diff --git a/src/Android.mk b/src/Android.mk
index cd6be9d..4a54afd 100644
--- a/src/Android.mk
+++ b/src/Android.mk
@@ -2,17 +2,7 @@ LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 LOCAL_MODULE := aEdax # should be renamed to lib..aEdax..so afterwords
 LOCAL_CFLAGS += -DUNICODE
-<<<<<<< HEAD
-<<<<<<< HEAD
 LOCAL_SRC_FILES := all.c board_sse.c.neon eval_sse.c.neon flip_neon_bitscan.c.neon android/cpu-features.c
 LOCAL_ARM_NEON := false
-=======
-LOCAL_SRC_FILES := all.c
-# LOCAL_ARM_NEON := true
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-LOCAL_SRC_FILES := all.c board_sse.c.neon eval_sse.c.neon flip_neon_bitscan.c.neon android/cpu-features.c
-LOCAL_ARM_NEON := false
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 # cmd-strip :=
 include $(BUILD_EXECUTABLE)
diff --git a/src/Makefile b/src/Makefile
index cdedf8f..4f6f51c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,27 +1,7 @@
 #
 # makefile to Compile Edax
 #
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 # Copyright 1998 - 2024
-=======
-# Copyright 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-# Copyright 1998 - 2022
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-# Copyright 1998 - 2022
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-=======
-# Copyright 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
-=======
-# Copyright 1998 - 2024
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
 # Richard Delorme
 # Version 4.5
 #
@@ -70,11 +50,6 @@ ifeq ($(COMP),gcc)
 	endif
 
 	ifeq ($(ARCH),x64-modern)
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
 	endif
 	ifeq ($(ARCH),x64-avx512)
@@ -85,30 +60,6 @@ ifeq ($(COMP),gcc)
 	endif
 	ifeq ($(ARCH),x64-k10)
 		CFLAGS += -m64 -march=amdfam10 -DUSE_GAS_X64 -DPOPCOUNT -DMOVE_GENERATOR=MOVE_GENERATOR_BITSCAN
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-		CFLAGS += -m64 -march=core-avx2 -mno-bmi2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8)
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
-	endif
-	ifeq ($(ARCH),x64-avx512)
-		CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
-	endif
-	ifeq ($(ARCH),x64-popcnt)
-		CFLAGS += -m64 -mpopcnt -mtune=generic -DUSE_GAS_X64 -DPOPCOUNT
-	endif
-	ifeq ($(ARCH),x64-k10)
-		CFLAGS += -m64 -march=amdfam10 -DUSE_GAS_X64 -DPOPCOUNT -DMOVE_GENERATOR=MOVE_GENERATOR_BITSCAN
 	endif
 	ifeq ($(ARCH),x32-modern)
 		CFLAGS += -mx32 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
@@ -119,39 +70,8 @@ ifeq ($(COMP),gcc)
 	ifeq ($(ARCH),x32)
 		CFLAGS += -mx32 -mtune=generic -DUSE_GAS_X64
 	endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	ifeq ($(ARCH),x86-modern)
-		CFLAGS += -m32 -march=core-avx2 -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2 -DPOPCOUNT -DLASTFLIP_HIGHCUT
-		ifeq ($(BUILD),optimize)
-			CFLAGS += -fomit-frame-pointer
-		endif
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	ifeq ($(ARCH),x86-sse)
 		CFLAGS += -m32 -march=pentium-m -mfpmath=sse -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2
-=======
-	ifeq ($(ARCH),x86-modern)
-		CFLAGS += -m32 -march=native -mfpmath=sse -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2 -DPOPCOUNT
-=======
-	ifeq ($(ARCH),x86-sse)
-		CFLAGS += -m32 -march=pentium-m -mfpmath=sse -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2
-		ifeq ($(BUILD),optimize)
-			CFLAGS += -fomit-frame-pointer
-		endif
->>>>>>> 46e3559 (fix gcc x86 build; add x86-sse build to makefile)
-	endif
-	ifeq ($(ARCH),x86)
-<<<<<<< HEAD
-		CFLAGS += -m32 -mtune=generic -DUSE_GAS_X86 -DUSE_GAS_MMX
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-		CFLAGS += -m32 -march=i386 -mtune=generic -DUSE_GAS_X86 -DUSE_GAS_MMX
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
 		ifeq ($(BUILD),optimize)
 			CFLAGS += -fomit-frame-pointer
 		endif
@@ -167,24 +87,14 @@ ifeq ($(COMP),gcc)
 			CFLAGS += -fomit-frame-pointer
 		endif
 	endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 	ifeq ($(ARCH),armv7)
 		CFLAGS += -march=armv7-a
 		ifeq ($(BUILD),optimize)
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	ifeq ($(ARCH),armv7)
-		CFLAGS += -march=armv7-a
-		ifeq ($(BUILD),optimize)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 			CFLAGS += -fomit-frame-pointer
 		endif
 	endif
 	ifeq ($(ARCH),arm-neon)
 		CFLAGS += -march=armv7-a+simd -mfloat-abi=softfp
-<<<<<<< HEAD
 		ifeq ($(BUILD),optimize)
 			CFLAGS += -fomit-frame-pointer
 		endif
@@ -193,23 +103,6 @@ ifeq ($(COMP),gcc)
 		CFLAGS += -march=armv8.2-a+sve
 		ifeq ($(BUILD),optimize)
 			CFLAGS += -fomit-frame-pointer
-<<<<<<< HEAD
-=======
-			CFLAGS += -fomit-frame-pointer -march=armv7-a -mfpu=neon
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-			CFLAGS += -fomit-frame-pointer -march=armv7-a -mfloat-abi=softfp -mfpu=neon -DhasNeon
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
-	ifeq ($(ARCH),ARMv7)
-		CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon -DhasNeon
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-		ifeq ($(BUILD),optimize)
-			CFLAGS += -fomit-frame-pointer
->>>>>>> 46e3559 (fix gcc x86 build; add x86-sse build to makefile)
-=======
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
 		endif
 	endif
 
@@ -221,30 +114,13 @@ ifeq ($(COMP),gcc)
 		ifeq ($(ARCH),arm)
 			CFLAGS += -march=armv8.3-a
 		endif
-<<<<<<< HEAD
-	endif
-	ifeq ($(OS),android)
-		CFLAGS += -DANDROID=1
-=======
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 	endif
 	ifeq ($(OS),android)
 		CFLAGS += -DANDROID=1
 	endif
 	ifeq ($(OS),windows)
 		CFLAGS += -D__USE_MINGW_ANSI_STDIO -DWINVER=0x0501
-<<<<<<< HEAD
-<<<<<<< HEAD
 		ifneq (,$(findstring x86,$(ARCH)))
-=======
-		ifeq ($(ARCH),x86-modern)
-			CFLAGS += -DUSE_PTHREAD
-		endif
-		ifeq ($(ARCH),x86)
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-		ifneq (,$(findstring x86,$(ARCH)))
->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.)
 			CFLAGS += -DUSE_PTHREAD
 		endif
 	endif
@@ -345,23 +221,7 @@ endif
 
 #icc
 ifeq ($(COMP),icc)
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -D_GNU_SOURCE=1 -DUNICODE -Qoption,cpp,--unicode_source_kind,"UTF-8"
-=======
-	CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -wd913 -D_GNU_SOURCE=1 -DUNICODE
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
-	CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -wd913 -DUNICODE
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -wd913 -D_GNU_SOURCE=1 -DUNICODE
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -D_GNU_SOURCE=1 -DUNICODE -Qoption,cpp,--unicode_source_kind,"UTF-8"
->>>>>>> 0b9d604 (Add more AVX512 builds; fix modern compiler's warnings)
 	PGO_GEN = -prof_gen
 	PGO_USE = -prof_use -wd11505
 
@@ -375,15 +235,7 @@ ifeq ($(COMP),icc)
 		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
 	endif
 	ifeq ($(ARCH),x64-avx512)
-<<<<<<< HEAD
-<<<<<<< HEAD
 		CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
-=======
-		CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> 0b9d604 (Add more AVX512 builds; fix modern compiler's warnings)
-=======
-		CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 	endif
 	ifeq ($(ARCH),x64)
 		CFLAGS += -m64 -DUSE_GAS_X64
@@ -419,68 +271,20 @@ endif
 
 #clang
 ifeq ($(COMP),clang)
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE
-=======
-	CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE -Wno-invalid-source-encoding
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
-	CFLAGS = -std=c99 -pedantic -W -Wall -DUNICODE -Wno-invalid-source-encoding
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE -Wno-invalid-source-encoding
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE
->>>>>>> 0b9d604 (Add more AVX512 builds; fix modern compiler's warnings)
 	PGO_GEN = -fprofile-instr-generate
 	PGO_USE = -fprofile-instr-use=edax.profdata
 	PGO = llvm-profdata merge -output=edax.profdata $(BIN)/*.profraw
 
 	ifeq ($(BUILD),optimize)
-<<<<<<< HEAD
-<<<<<<< HEAD
 		CFLAGS += -O3 -ffast-math -fomit-frame-pointer -DNDEBUG
 		LTOFLAG = -flto
-=======
-		CFLAGS += -O3 -flto -ffast-math -fomit-frame-pointer -DNDEBUG
->>>>>>> ea39994 (Improve clang compatibility)
-=======
-		CFLAGS += -O3 -ffast-math -fomit-frame-pointer -DNDEBUG
-		LTOFLAG = -flto
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	else
 		CFLAGS += -O0 -g -DDEBUG
 	endif
 
 	ifeq ($(ARCH),x64-modern)
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
-	endif
-	ifeq ($(ARCH),x64-avx512)
-		CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.)
-=======
-		CFLAGS += -m64 -march=core-avx2 -mno-bmi2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8)
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-=======
 		CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 	endif
 	ifeq ($(ARCH),x64-avx512)
 		CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT
@@ -514,39 +318,12 @@ endif
 ifeq ($(OS),windows)
 	EXE = wEdax-$(ARCH).exe
 	LIBS += -lws2_32
-<<<<<<< HEAD
-<<<<<<< HEAD
 	ifneq (,$(findstring x86,$(ARCH)))
 		LIBS += -Bstatic -Wl,-Bstatic,-lpthread
-<<<<<<< HEAD
-=======
-	ifeq ($(ARCH),x86-modern)
-		LIBS += -lpthread
-=======
->>>>>>> a03a9c9 (Static link to pthread on MSYS2 x86 build)
-	endif
-	ifeq ($(ARCH),x86)
-=======
-	ifneq (,$(findstring x86,$(ARCH)))
->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.)
-		LIBS += -lpthread
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
 	endif
 endif
 ifeq ($(OS),osx)
-<<<<<<< HEAD
-<<<<<<< HEAD
 	EXE = mEdax-$(ARCH)
-=======
-	ifeq ($(ARCH),x64)
-		EXE = mEdax
-	else
-		EXE = mEdax-$(ARCH)
-	endif
->>>>>>> e558fdb (Some cleanups for clang / android build)
-=======
-	EXE = mEdax-$(ARCH)
->>>>>>> ffdc063 (makefile for macuniversal)
 	LIBS += -lpthread
 endif
 
@@ -602,12 +379,6 @@ help:
 build:
 	@echo "building edax..."
 	$(CC) $(CFLAGS) $(LTOFLAG) all.c -s -o $(BIN)/$(EXE) $(LIBS)
-<<<<<<< HEAD
-
-source:
-	$(CC) $(CFLAGS) -S all.c
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
 source:
 	$(CC) $(CFLAGS) -S all.c
@@ -638,33 +409,12 @@ release:
 	$(MAKE) pgo-build ARCH=x64 OS=linux COMP=gcc 
 	$(MAKE) build ARCH=x64 OS=windows COMP=gcc CC='x86_64-w64-mingw32-gcc'
 	$(MAKE) build ARCH=x86 OS=windows COMP=gcc CC='i686-w64-mingw32-gcc'
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	$(MAKE) build ARCH=armv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc --sysroot=$SYSROOT'
-=======
-	$(MAKE) build ARCH=ARMv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc'
->>>>>>> e558fdb (Some cleanups for clang / android build)
-=======
-	$(MAKE) build ARCH=ARMv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc --sysroot=$SYSROOT'
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
 	$(MAKE) build ARCH=armv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc --sysroot=$SYSROOT'
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 	$(MAKE) clean
 	$(MAKE) build ARCH=x64 OS=osx COMP=gcc-old CC=i686-apple-darwin10-gcc
 
 android:
 	ndk-build NDK_PROJECT_PATH=. NDK_APPLICATION_MK=./Application.mk # NDK_DEBUG=1
-<<<<<<< HEAD
-
-macuniversal:
-	$(MAKE) build ARCH=x86 OS=osx COMP=clang
-	$(MAKE) build ARCH=x64 OS=osx COMP=clang
-	lipo -create -arch i686 ../bin/mEdax-x86 -arch x86_64 ../bin/mEdax-x64 -arch arm64 ../bin/mEdax-arm -output ../bin/mEdax
-	rm -f ../bin/mEdax-x86 ../bin/mEdax-x64
-=======
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 
 macuniversal:
 	$(MAKE) build ARCH=x86 OS=osx COMP=clang
diff --git a/src/NMakefile b/src/NMakefile
index 258d496..ba1a68e 100644
--- a/src/NMakefile
+++ b/src/NMakefile
@@ -8,8 +8,6 @@
 #
 
 # Microsoft Visual C++ 2008 or better for Windows (7 or Vista).
-<<<<<<< HEAD
-<<<<<<< HEAD
 # VC_FLAGS = /source-charset:.1252 /execution-charset:.1252\
 VC_FLAGS = /D UNICODE /utf-8 /D _CRT_SECURE_NO_DEPRECATE /I"..\include" /O2 /fp:fast /GS- /D NDEBUG /MT
 
@@ -103,146 +101,6 @@ vc-pgo-w64:
 	link all.obj  ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /VERSION:4.5
 	del *.pgc ..\bin\*.pgd
 
-icc-pgo-w64-modern:
-	icl $(VC_FLAGS) /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT /Qprof-gen all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5
-	cd ..\bin
-	wEdax-w64-modern -l 60 -solve ..\problem\fforum-20-39.obf
-	wEdax-w64-modern -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
-	del book.pgo book.pgo.store
-	cd ..\src
-	icl $(VC_FLAGS) /GL /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT /Qprof-use /Qip all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5
-=======
-VC_FLAGS = /I"..\include" /O2 /Oi /GL /fp:fast /source-charset:.1252 /execution-charset:.1252\
-           /D "NDEBUG" /D "inline=__inline" /D "__func__=__FUNCTION__"\
-=======
-# VC_FLAGS = /source-charset:.1252 /execution-charset:.1252\
-<<<<<<< HEAD
-VC_FLAGS = /D UNICODE /utf-8\
-<<<<<<< HEAD
-           /I"..\include" /O2 /Oi /GL /GF /fp:fast /D NDEBUG /D inline=__inline /D __func__=__FUNCTION__\
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
-=======
-VC_FLAGS = /D UNICODE /utf-8 /D _CRT_SECURE_NO_DEPRECATE\
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 6506166 (More SSE optimizations)
-           /I"..\include" /O2 /GL /fp:fast /D NDEBUG /D inline=__forceinline /D __func__=__FUNCTION__\
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-           /I"..\include" /O2 /fp:fast /GS- /D NDEBUG /D inline=__forceinline /D __func__=__FUNCTION__\
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-           /MT
-=======
-           /I"..\include" /O2 /fp:fast /D NDEBUG /D inline=__forceinline /D __func__=__FUNCTION__\
-<<<<<<< HEAD
-           /MT /GL
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-           /MT
->>>>>>> 59f61a8 (Drop /GL from clang build)
-
-vc-w64-modern:
-	cl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5
-
-vc-w64-avx512:
-	cl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:AVX512 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-avx512.exe /link /VERSION:4.5
-
-vc-w64-popcnt:
-	cl $(VC_FLAGS) /GL /D HAS_CPU_64 /D POPCOUNT all.c ws2_32.lib /Fe..\bin\wEdax-w64-popcnt.exe /link /VERSION:4.5
-
-vc-w64-k10:
-	cl $(VC_FLAGS) /GL /D HAS_CPU_64 /D POPCOUNT /D __LZCNT__ /D MOVE_GENERATOR=MOVE_GENERATOR_BITSCAN /favor:AMD64 all.c ws2_32.lib /Fe..\bin\wEdax-w64-k10.exe /link /VERSION:4.5
-
-vc-w64:
-	cl $(VC_FLAGS) /GL /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /VERSION:4.5
-
-vc-w32-modern:
-	cl $(VC_FLAGS) /GL /D hasSSE2 /arch:AVX2 /D POPCOUNT all.c ws2_32.lib /Fe..\bin\wEdax-w32-modern.exe
-
-vc-w32-sse:
-	cl $(VC_FLAGS) /GL /D hasSSE2 all.c ws2_32.lib /Fe..\bin\wEdax-w32-sse.exe
-
-vc-w32-mmx:
-	cl $(VC_FLAGS) /GL /arch:IA32 /D hasMMX all.c ws2_32.lib /Fe..\bin\wEdax-w32-mmx.exe
-
-vc-w32:
-	cl $(VC_FLAGS) /GL /arch:IA32 all.c ws2_32.lib /Fe..\bin\wEdax-w32.exe
-
-vc-a64:
-#	vcvarsamd64_arm64.bat
-	cl $(VC_FLAGS) /GL /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-a64.exe /link /VERSION:4.5
-
-vc-a32:
-#	vcvarsamd64_arm.bat
-	cl $(VC_FLAGS) /GL all.c ws2_32.lib /Fe..\bin\wEdax-a32.exe
-
-icc-w64-modern:
-	icl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5
-
-icc-w64-avx512:
-	icl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:CORE-AVX512 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-avx512.exe /link /VERSION:4.5
-
-icc-w64:
-	icl $(VC_FLAGS) /GL /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /VERSION:4.5
-
-icc-w32:
-	icl $(VC_FLAGS) /GL /arch:IA32 all.c ws2_32.lib /Fe..\bin\wEdax-w32.exe
-
-clang-w64-modern:
-	clang-cl $(VC_FLAGS) /D HAS_CPU_64 /arch:AVX2 /D POPCOUNT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5
-
-clang-w64:
-	clang-cl $(VC_FLAGS) /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /VERSION:4.5
-
-clang-w32:
-	clang-cl $(VC_FLAGS) /arch:IA32 all.c ws2_32.lib /Fe..\bin\wEdax-w32.exe
-
-vc-pgo-w64-modern:
-	set VCPROFILE_PATH=..\src
-	cl $(VC_FLAGS) /GL /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /ltcg:pgi /VERSION:4.5
-	cd ..\bin
-	wEdax-w64-modern -l 60 -solve ..\problem\fforum-20-39.obf
-	wEdax-w64-modern -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
-	del book.pgo book.pgo.store
-	cd ..\src
-	link all.obj  ws2_32.lib /out:..\bin\wEdax-w64-modern.exe /ltcg:pgo /VERSION:4.5
-	del *.pgc ..\bin\*.pgd
-
-vc-pgo-w64-k10:
-	set VCPROFILE_PATH=..\src
-	cl $(VC_FLAGS) /GL /D POPCOUNT /D __LZCNT__ /D MOVE_GENERATOR=MOVE_GENERATOR_BITSCAN all.c ws2_32.lib /Fe..\bin\wEdax-w64-k10.exe /link /ltcg:pgi /VERSION:4.5
-	cd ..\bin
-	wEdax-w64-k10 -l 60 -solve ..\problem\fforum-20-39.obf
-	wEdax-w64-k10 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
-	del book.pgo book.pgo.store
-	cd ..\src
-	link all.obj  ws2_32.lib /out:..\bin\wEdax-w64-k10.exe /ltcg:pgo /VERSION:4.5
-	del *.pgc ..\bin\*.pgd
-
-vc-pgo-w64:
-	set VCPROFILE_PATH=..\src
-	cl $(VC_FLAGS) /GL all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /ltcg:pgi /VERSION:4.5
-	cd ..\bin
-	wEdax-w64 -l 60 -solve ..\problem\fforum-20-39.obf
-	wEdax-w64 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
-	del book.pgo book.pgo.store
-	cd ..\src
-<<<<<<< HEAD
-<<<<<<< HEAD
-	link all.obj  ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /machine:x64 /VERSION:4.4
-<<<<<<< HEAD
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-=======
-	link all.obj  ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /VERSION:4.4
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-	link all.obj  ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /VERSION:4.5
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-	del *.pgc ..\bin\*.pgd
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-
 icc-pgo-w64-modern:
 	icl $(VC_FLAGS) /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT /Qprof-gen all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5
 	cd ..\bin
diff --git a/src/base.c b/src/base.c
index c6c6528..aef0f86 100644
--- a/src/base.c
+++ b/src/base.c
@@ -481,15 +481,7 @@ static void wthorgame_get_board(WthorGame *game, const int n_empties, Board *boa
 		if (board_is_pass(board)) {
 			board_pass(board); *player ^= 1;
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
 		board_get_move_flip(board, move_from_wthor(game->x[i]), &move);
-=======
-		board_get_move(board, move_from_wthor(game->x[i]), &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		board_get_move_flip(board, move_from_wthor(game->x[i]), &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		if (board_check_move(board, &move)) {
 			board_update(board, &move); *player ^= 1;
 		} else {
diff --git a/src/bench.c b/src/bench.c
index f9b1a82..7b6fad9 100644
--- a/src/bench.c
+++ b/src/bench.c
@@ -1,27 +1,9 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
 /**
  * @file bench.c
  *
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
  * @author Richard Delorme
  * @version 4.5
-=======
-/**
- * @file bench.c
- *
- * @date 1998 - 2023
- * @author Richard Delorme
-<<<<<<< HEAD
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @version 4.5
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
  */
 
 #include "bit.h"
@@ -38,15 +20,7 @@
  *
  * @return a CPU clock tick.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static unsigned long long click(void)
-=======
-static unsigned long long click()
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 static unsigned long long click(void)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 {
 #if defined(USE_GAS_X64)
 
@@ -60,15 +34,7 @@ static unsigned long long click(void)
 	__asm__ volatile (
 		"rdtsc" : "=A" (a));
 	return a;
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif defined(_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64))
-=======
-#elif defined(_WIN32)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #elif defined(_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64))
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 	return __rdtsc();
 #else
 	return cpu_clock();
@@ -78,15 +44,7 @@ static unsigned long long click(void)
 /*
  * @brief Move generator performance test.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 static void bench_move_generator(void)
-=======
-static void bench_move_generator()
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static void bench_move_generator(void)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 {
 	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
 	char m[4];
@@ -124,29 +82,13 @@ static void bench_move_generator(void)
 		
 		c = -click();
 		for (i = 0; i < N_WARMUP; ++i) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-			v += board_get_move_flip(&board, x, &move);
-=======
-			v += board_get_move(&board, x, &move);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 			v += board_get_move_flip(&board, x, &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		}
 		c += click();
 
 		c = -click();
 		for (i = 0; i < N_REPEAT; ++i) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 			v += board_get_move_flip(&board, x, &move);
-=======
-			v += board_get_move(&board, x, &move);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-			v += board_get_move_flip(&board, x, &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		}
 		c += click();
 
@@ -156,44 +98,20 @@ static void bench_move_generator(void)
 		if (t < t_min) t_min = t;
 		if (t > t_max) t_max = t;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (options.verbosity >= 2) printf("board_get_move_flip: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
-=======
-		if (options.verbosity >= 2) printf("board_get_move: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 		if (options.verbosity >= 2) printf("board_get_move_flip: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 
 	}
 
 	t_mean /= x;
 	t_var = t_var / x - (t_mean * t_mean);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	printf("board_get_move_flip:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
-=======
-	printf("board_get_move:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	printf("board_get_move_flip:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 }
 
 /*
  * @brief Last Move performance test.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 static void bench_count_last_flip(void)
-=======
-static void bench_count_last_flip()
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static void bench_count_last_flip(void)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 {
 	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
 	char m[4];
@@ -260,15 +178,7 @@ static void bench_count_last_flip(void)
 /*
  * @brief Scoring performance test.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static void bench_board_score_1(void)
-=======
-static void bench_board_score_1()
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 static void bench_board_score_1(void)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 {
 	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
 	char m[4];
@@ -307,37 +217,13 @@ static void bench_board_score_1(void)
 
 		c = -click();
 		for (i = 0; i < N_WARMUP; ++i) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 			v += board_score_1(board.player, SCORE_MAX - 1, x);
-=======
-			v += board_score_1(&board, SCORE_MAX, x);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-			v += board_score_1(board.player, SCORE_MAX, x);
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-=======
-			v += board_score_1(board.player, SCORE_MAX - 1, x);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
 		}
 		c += click();
 
 		c = -click();
 		for (i = 0; i < N_REPEAT; ++i) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			v += board_score_1(board.player, SCORE_MAX - 1, x);
-=======
-			v += board_score_1(&board, SCORE_MAX, x);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-			v += board_score_1(board.player, SCORE_MAX, x);
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-=======
 			v += board_score_1(board.player, SCORE_MAX - 1, x);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
 		}
 		c += click();
 
@@ -360,15 +246,7 @@ static void bench_board_score_1(void)
 /*
  * @brief Mobility performance test.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 static void bench_mobility(void)
-=======
-static void bench_mobility()
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static void bench_mobility(void)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 {
 	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
 	char m[4];
@@ -446,15 +324,7 @@ static void bench_mobility(void)
 /*
  * @brief Stability performance test.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static void bench_stability(void)
-=======
-static void bench_stability()
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 static void bench_stability(void)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 {
 	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
 	char m[4];
@@ -542,422 +412,3 @@ void bench(void)
 
 
 
-<<<<<<< HEAD
-=======
-/**
- * @file bench.c
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @version 4.4
- */
-
-#include "bit.h"
-#include "board.h"
-#include "move.h"
-#include "options.h"
-#include "search.h"
-#include "util.h"
-
-#include <math.h>
-
-/*
- * @brief return a CPU clock tick.
- *
- * @return a CPU clock tick.
- */
-static unsigned long long click()
-{
-#if defined(USE_GAS_X64)
-
-	unsigned int a, d;
-	__asm__ volatile (
-		"rdtsc" : "=a" (a), "=d" (d));
-	return a | (((unsigned long long)d) << 32);
-
-#elif defined(USE_GAS_X86)
-	unsigned long long a;
-	__asm__ volatile (
-		"rdtsc" : "=A" (a));
-	return a;
-#elif defined(_WIN32)
-	return __rdtsc();
-#else
-	return cpu_clock();
-#endif
-}
-
-/*
- * @brief Move generator performance test.
- */
-static void bench_move_generator()
-{
-	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
-	char m[4];
-	Board board;
-	Move move;
-	int i, x;
-	volatile int v;
-	const int N_WARMUP = 1000;
-	const int N_REPEAT = 1000000;
-	unsigned long long c, overhead;
-	double t, t_mean, t_var, t_min, t_max;
-
-	v = 0;
-	c = -click();
-	for (i = 0; i < N_WARMUP; ++i) {
-		v += i;
-	}
-	c += click();
-
-	c = -click();
-	for (i = 0; i < N_REPEAT; ++i) {
-		v += i;
-	}
-	c += click();
-	overhead = c;
-
-	t_mean = t_var = 0.0;
-	t_max = 0;
-	t_min = 1e30;
-
-	for (x = A1; x < PASS; ++x) {
-		board_set(&board, b);
-		board.player &= ~x_to_bit(x);
-		board.opponent &= ~x_to_bit(x);
-		
-		c = -click();
-		for (i = 0; i < N_WARMUP; ++i) {
-			v += board_get_move(&board, x, &move);
-		}
-		c += click();
-
-		c = -click();
-		for (i = 0; i < N_REPEAT; ++i) {
-			v += board_get_move(&board, x, &move);
-		}
-		c += click();
-
-		t = ((double)(c - overhead)) / N_REPEAT;
-		t_mean += t;
-		t_var += t * t;
-		if (t < t_min) t_min = t;
-		if (t > t_max) t_max = t;
-
-		if (options.verbosity >= 2) printf("board_get_move: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
-
-	}
-
-	t_mean /= x;
-	t_var = t_var / x - (t_mean * t_mean);
-
-	printf("board_get_move:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
-}
-
-/*
- * @brief Last Move performance test.
- */
-static void bench_count_last_flip()
-{
-	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
-	char m[4];
-	Board board;
-	int i, x;
-	volatile int v;
-	const int N_WARMUP = 1000;
-	const int N_REPEAT = 1000000;
-	unsigned long long c, overhead;
-	double t, t_mean, t_var, t_min, t_max;
-
-	v = 0;
-
-	c = -click();
-	for (i = 0; i < N_WARMUP; ++i) {
-		v += i;
-	}
-	c += click();
-
-	c = -click();
-	for (i = 0; i < N_REPEAT; ++i) {
-		v += i;
-	}
-	c += click();
-	overhead = c;
-
-	t_mean = t_var = 0.0;
-	t_max = 0;
-	t_min = 1e30;
-
-	for (x = A1; x < PASS; ++x) {
-		board_set(&board, b);
-		board.player &= ~x_to_bit(x);
-		// board.opponent &= ~x_to_bit(x);
-
-		c = -click();
-		for (i = 0; i < N_WARMUP; ++i) {
-			v += last_flip(x, board.player & ~i);
-		}
-		c += click();
-
-		c = -click();
-		for (i = 0; i < N_REPEAT; ++i) {
-			v += last_flip(x, board.player& ~i);
-		}
-		c += click();
-
-		t = ((double)(c - overhead)) / N_REPEAT;
-		t_mean += t;
-		t_var += t * t;
-		if (t < t_min) t_min = t;
-		if (t > t_max) t_max = t;
-
-		if (options.verbosity >= 2) printf("count_last_flip: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
-
-	}
-
-	t_mean /= x;
-	t_var = t_var / x - (t_mean * t_mean);
-
-	printf("count_last_flip:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
-}
-
-/*
- * @brief Scoring performance test.
- */
-static void bench_board_score_1()
-{
-	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
-	char m[4];
-	Board board;
-	int i, x;
-	volatile int v;
-	const int N_WARMUP = 1000;
-	const int N_REPEAT = 1000000;
-	unsigned long long c, overhead;
-	double t, t_mean, t_var, t_min, t_max;
-
-	board_set(&board, b);
-	v = 0;
-
-	c = -click();
-	for (i = 0; i < N_WARMUP; ++i) {
-		v += i;
-	}
-	c += click();
-
-	c = -click();
-	for (i = 0; i < N_REPEAT; ++i) {
-		v += i;
-	}
-	c += click();
-	overhead = c;
-
-	t_mean = t_var = 0.0;
-	t_max = 0;
-	t_min = 1e30;
-
-	for (x = A1; x < PASS; ++x) {
-		board_set(&board, b);
-		board.player &= ~x_to_bit(x);
-		board.opponent &= ~x_to_bit(x);
-
-		c = -click();
-		for (i = 0; i < N_WARMUP; ++i) {
-			v += board_score_1(&board, SCORE_MAX, x);
-		}
-		c += click();
-
-		c = -click();
-		for (i = 0; i < N_REPEAT; ++i) {
-			v += board_score_1(&board, SCORE_MAX, x);
-		}
-		c += click();
-
-		t = ((double)(c - overhead)) / N_REPEAT;
-		t_mean += t;
-		t_var += t * t;
-		if (t < t_min) t_min = t;
-		if (t > t_max) t_max = t;
-
-		if (options.verbosity >= 2) printf("board_score_1: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
-
-	}
-
-	t_mean /= x;
-	t_var = t_var / x - (t_mean * t_mean);
-
-	printf("board_score_1:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
-}
-
-/*
- * @brief Mobility performance test.
- */
-static void bench_mobility()
-{
-	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
-	char m[4];
-	Board board;
-	int i, x;
-	volatile int v;
-	const int N_WARMUP = 1000;
-	const int N_REPEAT = 1000000;
-	unsigned long long c, overhead;
-	double t, t_mean, t_var, t_min, t_max;
-
-	board_set(&board, b);
-	v = 0;
-	c = -click();
-	for (i = 0; i < N_WARMUP; ++i) {
-		board.player &= ~i;
-		board.opponent &= ~i;
-		v += i;
-	}
-	c += click();
-
-	board_set(&board, b);
-	c = -click();
-	for (i = 0; i < N_REPEAT; ++i) {
-		board.player &= ~i;
-		board.opponent &= ~i;
-		v += i;
-	}
-	c += click();
-	overhead = 0;
-
-	t_mean = t_var = 0.0;
-	t_max = 0;
-	t_min = 1e30;
-
-	for (x = A1; x < PASS; ++x) {
-		board_set(&board, b);
-
-		v = 0;
-		c = -click();
-		for (i = 0; i < N_WARMUP; ++i) {
-			board.player &= ~i;
-			board.opponent &= ~i;
-			v += get_mobility(board.player, board.opponent);
-			v -= get_mobility(board.opponent, board.player);
-		}
-		c += click();
-
-		board_set(&board, b);
-		c = -click();
-		for (i = 0; i < N_REPEAT; ++i) {
-			board.player &= ~i;
-			board.opponent &= ~i;
-			v += get_mobility(board.player, board.opponent);
-			v -= get_mobility(board.opponent, board.player);
-		}
-		c += click();
-
-		t = ((double)(c - overhead)) / N_REPEAT / 2;
-		t_mean += t;
-		t_var += t * t;
-		if (t < t_min) t_min = t;
-		if (t > t_max) t_max = t;
-
-		if (options.verbosity >= 2) printf("v = %d\n", v);
-		if (options.verbosity >= 2) printf("mobility: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
-	}
-
-	t_mean /= x;
-	t_var = t_var / x - (t_mean * t_mean);
-
-	printf("mobility:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
-}
-
-/*
- * @brief Stability performance test.
- */
-static void bench_stability()
-{
-	const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O";
-	char m[4];
-	Board board;
-	int i, x;
-	volatile int v;
-	const int N_WARMUP = 1000;
-	const int N_REPEAT = 1000000;
-	unsigned long long c, overhead;
-	double t, t_mean, t_var, t_min, t_max;
-
-	board_init(&board);
-
-	v = 0;
-	x = A1;
-	c = -click();
-	for (i = 0; i < N_WARMUP; ++i) {
-		board.player &= ~x_to_bit(x);
-		board.opponent &= ~x_to_bit(x);
-	}
-	c += click();
-
-	board_set(&board, b);
-	c = -click();
-	for (i = 0; i < N_REPEAT; ++i) {
-		board.player &= ~x_to_bit(x);
-		board.opponent &= ~x_to_bit(x);
-	}
-	c += click();
-	overhead = c;
-
-	t_mean = t_var = 0.0;
-	t_max = 0;
-	t_min = 1e30;
-
-	for (x = A1; x < PASS; ++x) {
-		board_set(&board, b);
-
-		v = 0;
-		c = -click();
-		for (i = 0; i < N_WARMUP; ++i) {
-			board.player &= ~x_to_bit(x);
-			board.opponent &= ~x_to_bit(x);
-			v += get_stability(board.player, board.opponent);
-		}
-		c += click();
-
-		board_set(&board, b);
-		c = -click();
-		for (i = 0; i < N_REPEAT; ++i) {
-			board.player &= ~x_to_bit(x);
-			board.opponent &= ~x_to_bit(x);
-			v += get_stability(board.player, board.opponent);
-		}
-		c += click();
-
-		t = ((double)(c - overhead)) / N_REPEAT;
-		t_mean += t;
-		t_var += t * t;
-		if (t < t_min) t_min = t;
-		if (t > t_max) t_max = t;
-
-		if (options.verbosity >= 2) printf("v = %d\n", v);
-		if (options.verbosity >= 2) printf("stability: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t);
-	}
-
-	t_mean /= x;
-	t_var = t_var / x - (t_mean * t_mean);
-	
-	printf("stability:  %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max);
-}
-
-/**
- * @brief perform various performance tests.
- */
-void bench(void)
-{
-	printf("The unit of the results is CPU cycles\n");
-	bench_move_generator();
-	bench_count_last_flip();
-	bench_board_score_1();
-	bench_mobility();
-	bench_stability();
-}
-
-
-
->>>>>>> c04475d (Fix microbench not to be optimized out)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/bit.c b/src/bit.c
index 218a399..de9f903 100644
--- a/src/bit.c
+++ b/src/bit.c
@@ -6,31 +6,7 @@
  * a macro needs to be defined to chose between different flavors of the
  * algorithm.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
- * @date 1998 - 2020
->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> dc7c79c (Omit unpack from get_edge_stability)
-=======
- * @date 1998 - 2023
->>>>>>> a9633d5 (Initial 4.5.2; some reformats)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -38,22 +14,11 @@
 #include "bit.h"
 #include "util.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
 /** Table for a 32-bits-at-a-time software CRC-32C calculation.
  * This tablehas built into it the pre and post bit inversion of the CRC. */
 #ifndef crc32c_u64
 static unsigned int crc32c_table[4][256];
 #endif
-<<<<<<< HEAD
-=======
-/** coordinate to bit table converter */
-unsigned long long X_TO_BIT[66];
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
 
 /** coordinate to bit table converter */
 unsigned long long X_TO_BIT[66];
@@ -94,28 +59,15 @@ const unsigned long long NEIGHBOUR[] = {
  * @return the number of bits set.
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef POPCOUNT
-  #if 0
-=======
-#if 0 // ndef POPCOUNT
->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish)
-=======
 #ifndef POPCOUNT
   #if 0
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 int bit_count(unsigned long long b)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int	c;
 
 	b  = b - ((b >> 1) & 0x5555555555555555ULL);
 	b  = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
     #ifdef HAS_CPU_64
-<<<<<<< HEAD
 	b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
 	c = (b * 0x0101010101010101ULL) >> 56;
     #else
@@ -125,42 +77,6 @@ int bit_count(unsigned long long b)
     #endif
 	return c;
 }
-=======
-	register unsigned long long c;
-=======
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-=======
-	int	c;
-<<<<<<< HEAD
-	#if 0 // defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-	static const unsigned long long M55 = 0x5555555555555555ULL;
-	static const unsigned long long M33 = 0x3333333333333333ULL;
-	static const unsigned long long M0F = 0x0F0F0F0F0F0F0F0FULL;
-	#endif
-
-// MMX does not help much here :-(
-	#if 0 // def USE_MSVC_X86
-	__m64	m;
-
-	if (hasSSE2) {
-		m = *(__m64 *) &b;
-		m = _m_psubd(m, _m_pand(_m_psrlqi(m, 1), *(__m64 *) &M55));
-		m = _m_paddd(_m_pand(m, *(__m64 *) &M33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &M33));
-		m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &M0F);
-		c = _m_to_int(_m_psadbw(m, _mm_setzero_si64()));
-		_mm_empty();
-
-		return c;
-	}
-
-<<<<<<< HEAD
-	#elif defined(USE_GAS_MMX)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	#elif 0 // defined(USE_GAS_MMX)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
   #else
 // https://github.com/official-stockfish/Stockfish/pull/620/files
@@ -187,7 +103,6 @@ void bit_init(void)
 #ifndef crc32c_u64
 	unsigned int	k, crc;
 
-<<<<<<< HEAD
 	// http://stackoverflow.com/a/17646775/1821055
 	// https://github.com/baruch/crcbench
 	// Generate byte-wise table.
@@ -211,126 +126,19 @@ void bit_init(void)
 	for (n = 0; n < 66; ++n) {	// X_TO_BIT[64] = X_TO_BIT[65] = 0 for passing move & nomove
 		X_TO_BIT[n] = ll;
 		ll <<= 1;
-=======
-			"pxor  %%mm2, %%mm2\n\t"
-			"psadbw %%mm2, %%mm0\n\t"	// SSE2
-			"movd	%%mm0, %0\n\t"
-			"emms"
-		: "=a" (c)
-		: "rm" (b), "m" (M55), "m" (M33), "m" (M0F), "m" (((unsigned int *) &b)[1]));
-
-		return c;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	}
 
-<<<<<<< HEAD
 #ifndef POPCOUNT
 	for (n = 0; n < (1 << 16); ++n)
 		PopCnt16[n] = bit_count_32_SWAR(n);
-=======
-	#endif
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-
-	b  = b - ((b >> 1) & 0x5555555555555555ULL);
-	b  = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
-#ifdef HAS_CPU_64
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-	b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
-	c = (b * 0x0101010101010101ULL) >> 56;
-    #else
-	c = (b >> 32) + b;
-	c = (c & 0x0F0F0F0F) + ((c >> 4) & 0x0F0F0F0F);
-	c = (c * 0x01010101) >> 24;
-    #endif
-	return c;
-}
-<<<<<<< HEAD
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
 #endif
 
-<<<<<<< HEAD
 #if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2)
 	init_mmx();
 #endif
 #if defined(ANDROID) && !defined(__ARM_NEON) && !defined(hasSSE2)
 	init_neon();
 #endif
-=======
-#ifndef POPCOUNT
-=======
-
-  #else
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-// https://github.com/official-stockfish/Stockfish/pull/620/files
-// 2% faster than SWAR bit_count for 32 & 64 non-POPCOUNT build
-unsigned char PopCnt16[1 << 16];
-
-static int bit_count_32_SWAR(unsigned int b)
-{
-	b = b - ((b >> 1) & 0x55555555);
-	b = ((b >> 2) & 0x333333333) + (b & 0x33333333);
-	b = ((b >> 4) + b) & 0x0F0F0F0F;
-	return (b * 0x01010101) >> 24;
-}
-  #endif
-#endif
-
-/**
- * @brief initialize PopCnt16 table and check MMX/SSE availability.
- */
-void bit_init(void)
-{
-	unsigned int	n;
-	unsigned long long	ll;
-#ifndef crc32c_u64
-	unsigned int	k, crc;
-
-	// http://stackoverflow.com/a/17646775/1821055
-	// https://github.com/baruch/crcbench
-	// Generate byte-wise table.
-	for (n = 0; n < 256; n++) {
-		crc = ~n;
-		for (k = 0; k < 8; k++)
-			crc = (crc >> 1) ^ (-(int)(crc & 1) & 0x82f63b78);
-		crc32c_table[0][n] = ~crc;
-	}
-	// Use byte-wise table to generate word-wise table.
-	for (n = 0; n < 256; n++) {
-		crc = ~crc32c_table[0][n];
-		for (k = 1; k < 4; k++) {
-			crc = crc32c_table[0][crc & 0xff] ^ (crc >> 8);
-			crc32c_table[k][n] = ~crc;
-		}
-	}
-#endif
-
-	ll = 1;
-	for (n = 0; n < 66; ++n) {	// X_TO_BIT[64] = X_TO_BIT[65] = 0 for passing move & nomove
-		X_TO_BIT[n] = ll;
-		ll <<= 1;
-	}
-
-#ifndef POPCOUNT
-	for (n = 0; n < (1 << 16); ++n)
-		PopCnt16[n] = bit_count_32_SWAR(n);
-#endif
-
-#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2)
-	init_mmx();
-#endif
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish)
-=======
-#if defined(ANDROID) && !defined(hasNeon) && !defined(hasSSE2)
-=======
-#if defined(ANDROID) && !defined(__ARM_NEON) && !defined(hasSSE2)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-	init_neon();
-#endif
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 }
 
 /**
@@ -343,17 +151,8 @@ void bit_init(void)
  * @param v 64-bit integer to count bits of.
  * @return the number of bit set, counting the corners twice.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT)
-__m128i bit_weighted_count_sse(unsigned long long Q0, unsigned long long Q1)
-=======
-int bit_weighted_count(unsigned long long v)
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
 #if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT)
 __m128i bit_weighted_count_sse(unsigned long long Q0, unsigned long long Q1)
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
 {
 	static const V2DI mask15 = {{ 0x1555555555555515, 0x1555555555555515 }};
 	static const V2DI mask01 = {{ 0x0100000000000001, 0x0100000000000001 }};
@@ -367,15 +166,7 @@ __m128i bit_weighted_count_sse(unsigned long long Q0, unsigned long long Q1)
 	return _mm_sad_epu8(v, _mm_setzero_si128());
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #elif defined(__ARM_NEON)
-=======
-#elif defined(hasNeon)
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-=======
-#elif defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 uint64x2_t bit_weighted_count_neon(unsigned long long Q0, unsigned long long Q1)
 {
 	uint64x2_t v = vcombine_u64(vcreate_u64(Q0), vcreate_u64(Q1));
@@ -383,39 +174,13 @@ uint64x2_t bit_weighted_count_neon(unsigned long long Q0, unsigned long long Q1)
 		vcntq_u8(vreinterpretq_u8_u64(vandq_u64(v, vdupq_n_u64(0x8100000000000081))))))));
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #elif 0	// SWAR, for record
 int bit_weighted_count(unsigned long long v)
 {
-<<<<<<< HEAD
-=======
-#if defined(POPCOUNT)
-  	unsigned int P2187 = (v >> 48) | (v << 16);	// ror 48
-	return bit_count(v) + bit_count_32(P2187 & 0x00818100);
-
-#else
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
-=======
-#else
-int bit_weighted_count(unsigned long long v)
-{
-  #if defined(POPCOUNT)
-  	unsigned int P2187 = (v >> 48) | (v << 16);	// ror 48
-	return bit_count(v) + bit_count_32(P2187 & 0x00818100);
-
-  #else
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-=======
-#elif 0	// SWAR, for record
-int bit_weighted_count(unsigned long long v)
-{
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	int	c;
 
 	v  = v - ((v >> 1) & 0x1555555555555515) + (v & 0x0100000000000001);
 	v  = ((v >> 2) & 0x3333333333333333) + (v & 0x3333333333333333);
-<<<<<<< HEAD
 	c = (v >> 32) + v;
 	c = (c & 0x0F0F0F0F) + ((c >> 4) & 0x0F0F0F0F);
 	c = (c * 0x01010101) >> 24;
@@ -423,8 +188,6 @@ int bit_weighted_count(unsigned long long v)
 }
 
 #else
-<<<<<<< HEAD
-<<<<<<< HEAD
 int bit_weighted_count(unsigned long long v)
 {
   	unsigned int AH18 = ((v >> 56) | (v << 8)) & 0x8181;	// ror 56
@@ -433,42 +196,6 @@ int bit_weighted_count(unsigned long long v)
   #else
   	return bit_count(v) + PopCnt16[AH18];
   #endif
-=======
-=======
-	int	c;
-
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-	v  = v - ((v >> 1) & 0x1555555555555515ULL) + (v & 0x0100000000000001ULL);
-	v  = ((v >> 2) & 0x3333333333333333ULL) + (v & 0x3333333333333333ULL);
-    #ifdef HAS_CPU_64
-	v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
-	c = (v * 0x0101010101010101ULL) >> 56;
-    #else
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-	c = (v >> 32) + v;
-	c = (c & 0x0F0F0F0F) + ((c >> 4) & 0x0F0F0F0F);
-	c = (c * 0x01010101) >> 24;
-	return c;
-<<<<<<< HEAD
-<<<<<<< HEAD
-#endif
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
-=======
-}
-
-#else
-int bit_weighted_count(unsigned long long v)
-{
-  	unsigned int AH18 = ((v >> 56) | (v << 8)) & 0x8181;	// ror 56
-  #ifdef POPCOUNT
-	return bit_count(v) + bit_count_32(AH18);
-  #else
-  	return bit_count(v) + PopCnt16[AH18];
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-  #endif
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
 }
 #endif
 
@@ -485,25 +212,6 @@ int bit_weighted_count(unsigned long long v)
  */
 #if !defined(first_bit_32) && !defined(HAS_CPU_64)
 int first_bit_32(unsigned int b)
-<<<<<<< HEAD
-{
-  #if defined(_MSC_VER)
-	unsigned long index;
-	_BitScanForward(&index, b);
-	return (int) index;
-
-  #elif defined(USE_GAS_X64) || defined(USE_GAS_X86)
-	__asm__("rep; bsf	%1, %0" : "=r" (b) : "rm" (b));	// tzcnt on BMI CPUs, bsf otherwise
-	return (int) b;
-
-  #elif defined(USE_MSVC_X86)
-	__asm {
-		bsf	eax, word ptr b
-	}
-
-  #elif defined(USE_GCC_ARM)
-	return  __builtin_clz(b & -b) ^ 31;
-=======
 {
   #if defined(_MSC_VER)
 	unsigned long index;
@@ -533,41 +241,6 @@ int first_bit_32(unsigned int b)
 }
 #endif // first_bit_32
 
-#ifndef first_bit
-int first_bit(unsigned long long b)
-{
-  #if defined(USE_GAS_X64)
-	__asm__("rep; bsfq	%1, %0" : "=r" (b) : "rm" (b));	// tzcntq on BMI CPUs
-	return (int) b;
-
-  #elif defined(USE_GAS_X86)
-	int 	x;
-	__asm__ ("bsf	%2, %0\n\t"	// (ZF differs from tzcnt)
-		"jnz	1f\n\t"
-		"bsf	%1, %0\n\t"
-		"addl	$32, %0\n"
-	"1:" : "=&q" (x) : "g" ((int) (b >> 32)), "g" ((int) b));
-	return x;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #else
-	static const unsigned char magic[32] = {
-		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 
-		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
-	};
-=======
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM))
-<<<<<<< HEAD
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-	return magic[((b & (-b)) * 0x077CB531U) >> 27];
-  #endif
-}
-#endif // first_bit_32
-
 #ifndef first_bit
 int first_bit(unsigned long long b)
 {
@@ -585,31 +258,11 @@ int first_bit(unsigned long long b)
 	return x;
 
   #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-  #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	unsigned long index;
 	_BitScanForward64(&index, b);
 	return (int) index;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #elif defined(USE_MSVC_X86)
-=======
-#elif defined(USE_MASM_X86)
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-#elif defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
   #elif defined(USE_MSVC_X86)
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	__asm {
 		bsf	eax, dword ptr b
 		jnz	l1
@@ -618,15 +271,7 @@ int first_bit(unsigned long long b)
 	l1:
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
   #elif defined(HAS_CPU_64)
-=======
-#elif defined(HAS_CPU_64)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-  #elif defined(HAS_CPU_64)
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	static const unsigned char magic[64] = {
 		63, 0, 58, 1, 59, 47, 53, 2,
 		60, 39, 48, 27, 54, 33, 42, 3,
@@ -640,41 +285,17 @@ int first_bit(unsigned long long b)
 
 	return magic[((b & (-b)) * 0x07EDD5E59A4E28C2ULL) >> 58];
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #else
-=======
-#else
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
   #else
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	const unsigned int lb = (unsigned int) b;
 	if (lb) {
 		return first_bit_32(lb);
 	} else {
 		return 32 + first_bit_32(b >> 32);
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #endif
-=======
-#endif
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
   #endif
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-#endif // first_bit
-
-=======
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
 #endif // first_bit
 
->>>>>>> ea39994 (Improve clang compatibility)
 #if 0
 /**
  * @brief Search the next bit set.
@@ -704,56 +325,16 @@ int next_bit(unsigned long long *b)
  */
 int last_bit(unsigned long long b)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
   #if defined(USE_GAS_X64)
-=======
-#if defined(USE_GAS_X64)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	__asm__("bsrq	%1, %0" :"=r" (b) :"rm" (b));
 	return b;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
   #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
-=======
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM))
-<<<<<<< HEAD
-
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-  #if defined(USE_GAS_X64)
-	__asm__("bsrq	%1, %0" :"=r" (b) :"rm" (b));
-	return b;
-
-  #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	unsigned long index;
 	_BitScanReverse64(&index, b);
 	return (int) index;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #elif defined(USE_GAS_X86)
-	int	x;
-	__asm__ ("bsr	%1, %0\n\t"
-		"leal	32(%0), %0\n\t"
-		"jnz	1f\n\t"
-		"bsr	%2, %0\n\t"
-        "1:" : "=&q" (x) : "g" ((int) (b >> 32)), "g" ((int) b));
-	return x;
-
-  #elif 0 // defined(USE_GCC_ARM)
-=======
-#elif defined(USE_GAS_X86)
-=======
   #elif defined(USE_GAS_X86)
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	int	x;
 	__asm__ ("bsr	%1, %0\n\t"
 		"leal	32(%0), %0\n\t"
@@ -762,16 +343,7 @@ int last_bit(unsigned long long b)
         "1:" : "=&q" (x) : "g" ((int) (b >> 32)), "g" ((int) b));
 	return x;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif defined(USE_GCC_ARM)
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-#elif 0 // defined(USE_GCC_ARM)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
   #elif 0 // defined(USE_GCC_ARM)
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	const unsigned int hb = b >> 32;
 	if (hb) {
 		return 63 - __builtin_clz(hb);
@@ -779,19 +351,7 @@ int last_bit(unsigned long long b)
 		return 31 - __builtin_clz((int) b);
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
   #elif defined(USE_MSVC_X86)
-=======
-
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-#elif defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-  #elif defined(USE_MSVC_X86)
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	__asm {
 		bsr	eax, dword ptr b+4
 		lea	eax, [eax+32]
@@ -800,10 +360,6 @@ int last_bit(unsigned long long b)
 	l1:
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
   #elif defined(HAS_CPU_64)
 	// https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2
 	static const unsigned char magic[64] = {
@@ -815,30 +371,6 @@ int last_bit(unsigned long long b)
 		34, 51, 20, 43, 31, 22, 10, 45,
 		25, 39, 14, 33, 19, 30,  9, 24,
 		13, 18,  8, 12,  7,  6,  5, 63
-=======
-#elif defined(HAS_CPU_64)
-	// https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2
-	static const unsigned char magic[64] = {
-<<<<<<< HEAD
-		63, 0, 58, 1, 59, 47, 53, 2,
-		60, 39, 48, 27, 54, 33, 42, 3,
-		61, 51, 37, 40, 49, 18, 28, 20,
-		55, 30, 34, 11, 43, 14, 22, 4,
-		62, 57, 46, 52, 38, 26, 32, 41,
-		50, 36, 17, 19, 29, 10, 13, 21,
-		56, 45, 25, 31, 35, 16, 9, 12,
-		44, 24, 15, 8, 23, 7, 6, 5
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-		 0, 47,  1, 56, 48, 27,  2, 60,
-		57, 49, 41, 37, 28, 16,  3, 61,
-		54, 58, 35, 52, 50, 42, 21, 44,
-		38, 32, 29, 23, 17, 11,  4, 62,
-		46, 55, 26, 59, 40, 36, 15, 53,
-		34, 51, 20, 43, 31, 22, 10, 45,
-		25, 39, 14, 33, 19, 30,  9, 24,
-		13, 18,  8, 12,  7,  6,  5, 63
->>>>>>> 13d6004 (Update last_bit from chessprogramming wiki)
 	};
 
 	b |= b >> 1;
@@ -850,16 +382,7 @@ int last_bit(unsigned long long b)
 
 	return magic[(b * 0x03f79d71b4cb0a89) >> 58];
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #else
-=======
-#else
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
   #else
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	static const unsigned char clz_table_4bit[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
 	int	n = 63;
 	unsigned int	x;
@@ -871,39 +394,10 @@ int last_bit(unsigned long long b)
 	if ((x & 0xF0000000) == 0) { n -=  4; x <<=  4; }
 	n -= clz_table_4bit[x >> (32 - 4)];
 	return n;
-<<<<<<< HEAD
-<<<<<<< HEAD
   #endif
-=======
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-#endif
-=======
-  #endif
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-}
-#endif // last_bit
-
-<<<<<<< HEAD
-#ifndef bswap_short
-/**
- * @brief Swap bytes of a short (little <-> big endian).
- * @param s An unsigned short.
- * @return The mirrored short.
- */
-unsigned short bswap_short(unsigned short s)
-{
-	return (unsigned short) ((s >> 8) & 0x00FF) | ((s & 0x00FF) <<  8);
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
 }
-<<<<<<< HEAD
 #endif // last_bit
-=======
-#endif
->>>>>>> ea39994 (Improve clang compatibility)
 
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
 #ifndef bswap_int
 /**
  * @brief Mirror the unsigned int (little <-> big endian).
@@ -924,26 +418,9 @@ unsigned int bswap_int(unsigned int i)
  */
 unsigned long long vertical_mirror(unsigned long long b)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	return bswap_int((unsigned int)(b >> 32)) | ((unsigned long long) bswap_int((unsigned int) b) << 32);
 }
 #endif // bswap_int
-=======
-	b = ((b >>  8) & 0x00FF00FF00FF00FFULL) | ((b & 0x00FF00FF00FF00FFULL) <<  8);
-	b = ((b >> 16) & 0x0000FFFF0000FFFFULL) | ((b & 0x0000FFFF0000FFFFULL) << 16);
-	b = (b >> 32) | (b << 32);
-	return b;
-=======
-	return bswap_int((unsigned int)(b >> 32)) | ((unsigned long long) bswap_int((unsigned int) b) << 32);
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-}
-<<<<<<< HEAD
-#endif
->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build)
-=======
-#endif // bswap_int
->>>>>>> ea39994 (Improve clang compatibility)
 
 /**
  * @brief Mirror the unsigned long long (exchange the line 1 - 8, 2 - 7, 3 - 6 & 4 - 5).
@@ -952,43 +429,19 @@ unsigned long long vertical_mirror(unsigned long long b)
  */
 unsigned int horizontal_mirror_32(unsigned int b)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef __ARM_ACLE
-	return __rev(__rbit(b));
-#else
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 #ifdef __ARM_ACLE
 	return __rev(__rbit(b));
 #else
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	b = ((b >> 1) & 0x55555555U) +  2 * (b & 0x55555555U);
 	b = ((b >> 2) & 0x33333333U) +  4 * (b & 0x33333333U);
 	b = ((b >> 4) & 0x0F0F0F0FU) + 16 * (b & 0x0F0F0F0FU);
 	return b;
-<<<<<<< HEAD
-<<<<<<< HEAD
 #endif
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#endif
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 }
 
 unsigned long long horizontal_mirror(unsigned long long b)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(HAS_CPU_64) && !defined(__ARM_ACLE)
-=======
-#ifdef HAS_CPU_64
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 #if defined(HAS_CPU_64) && !defined(__ARM_ACLE)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	b = ((b >> 1) & 0x5555555555555555ULL) | ((b & 0x5555555555555555ULL) << 1);
 	b = ((b >> 2) & 0x3333333333333333ULL) | ((b & 0x3333333333333333ULL) << 2);
 	b = ((b >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((b & 0x0F0F0F0F0F0F0F0FULL) << 4);
@@ -1004,38 +457,10 @@ unsigned long long horizontal_mirror(unsigned long long b)
  * @param b An unsigned long long
  * @return The transposed unsigned long long.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef __AVX2__
-<<<<<<< HEAD
-unsigned long long transpose(unsigned long long b)
-{
-	__m256i	v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), _mm256_set_epi64x(0, 1, 2, 3));
-=======
-#include <x86intrin.h>
-unsigned long long transpose(unsigned long long b)
-{
-	static const __v4di s3210 = { 3, 2, 1, 0 };
-<<<<<<< HEAD
-	__v4di	v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_set_epi64x(0, b)), s3210);
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-	__v4di	v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), s3210);
->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build)
-=======
-#if defined(__AVX2__) && (defined(__x86_64__) || defined(_M_X64))
-=======
 #ifdef __AVX2__
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
 unsigned long long transpose(unsigned long long b)
 {
-<<<<<<< HEAD
-	static const V4DI s3210 = {{ 3, 2, 1, 0 }};
-	__m256i	v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), s3210.v4);
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 	__m256i	v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), _mm256_set_epi64x(0, 1, 2, 3));
->>>>>>> 4303b09 (Returns all full lines in full[4])
 	return ((unsigned long long) _mm256_movemask_epi8(v) << 32)
 		| (unsigned int) _mm256_movemask_epi8(_mm256_slli_epi64(v, 4));
 }
@@ -1054,51 +479,9 @@ unsigned long long transpose(unsigned long long b)
 
 	return b;
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
 #endif // __AVX2__
 
 #ifndef crc32c_u64
-<<<<<<< HEAD
-/**
- * @brief Caliculate crc32c checksum for 8 bytes data
- * @param crc Initial crc from previous data.
- * @param data Data to accumulate.
- * @return Resulting crc.
- */
-unsigned int crc32c_u64(unsigned int crc, unsigned long long data)
-{
-	crc ^= (unsigned int) data;
-	crc =	crc32c_table[3][crc & 0xff] ^
-		crc32c_table[2][(crc >> 8) & 0xff] ^
-		crc32c_table[1][(crc >> 16) & 0xff] ^
-		crc32c_table[0][crc >> 24];
-	crc ^= (unsigned int) (data >> 32);
-	return	crc32c_table[3][crc & 0xff] ^
-		crc32c_table[2][(crc >> 8) & 0xff] ^
-		crc32c_table[1][(crc >> 16) & 0xff] ^
-		crc32c_table[0][crc >> 24];
-}
-
-/**
- * @brief Caliculate crc32c checksum for a byte
- * @param crc Initial crc from previous data.
- * @param data Data to accumulate.
- * @return Resulting crc.
- */
-unsigned int crc32c_u8(unsigned int crc, unsigned int data)
-{
-	return	crc32c_table[0][(crc ^ data) & 0xff] ^ (crc >> 8);
-}
-=======
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-#endif
-=======
-#endif // __AVX2__
->>>>>>> ea39994 (Improve clang compatibility)
-
-=======
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 /**
  * @brief Caliculate crc32c checksum for 8 bytes data
  * @param crc Initial crc from previous data.
diff --git a/src/bit.h b/src/bit.h
index c615fd2..c08243e 100644
--- a/src/bit.h
+++ b/src/bit.h
@@ -3,39 +3,7 @@
  *
  * Bitwise operations header file.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
- * @date 1998 - 2020
->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
- * @date 1998 - 2020
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-=======
- * @date 1998 - 2022
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
- * @date 1998 - 2023
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -53,71 +21,14 @@
 struct Random;
 
 /* declaration */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-void bit_init(void);
-<<<<<<< HEAD
-// int next_bit(unsigned long long*);
-void bitboard_write(unsigned long long, FILE*);
-=======
-int bit_weighted_count(const unsigned long long);
-// int next_bit(unsigned long long*);
-void bitboard_write(const unsigned long long, FILE*);
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-=======
 void bit_init(void);
->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish)
-int bit_weighted_count(unsigned long long);
-=======
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
 // int next_bit(unsigned long long*);
 void bitboard_write(unsigned long long, FILE*);
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
 unsigned long long transpose(unsigned long long);
-<<<<<<< HEAD
-<<<<<<< HEAD
-unsigned int horizontal_mirror_32(unsigned int b);
-=======
->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build)
-=======
 unsigned int horizontal_mirror_32(unsigned int b);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 unsigned long long horizontal_mirror(unsigned long long);
 int get_rand_bit(unsigned long long, struct Random*);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT)
-	__m128i bit_weighted_count_sse(unsigned long long, unsigned long long);
-#elif defined (__ARM_NEON)
-	uint64x2_t bit_weighted_count_neon(unsigned long long, unsigned long long);
-=======
-#ifdef __GNUC__
-#define	bswap_short(x)	__builtin_bswap16(x)
-#define	bswap_int(x)	__builtin_bswap32(x)
-#define	vertical_mirror(x)	__builtin_bswap64(x)
-#elif defined(_MSC_VER)
-#define	bswap_short(x)	_byteswap_ushort(x)
-#define	bswap_int(x)	_byteswap_ulong(x)
-#define	vertical_mirror(x)	_byteswap_uint64(x)
-#else
-unsigned short bswap_short(unsigned short);
-unsigned int bswap_int(unsigned int);
-unsigned long long vertical_mirror(unsigned long long);
-=======
-=======
-=======
-extern const unsigned long long X_TO_BIT[];
-/** Return a bitboard with bit x set. */
-#define x_to_bit(x) X_TO_BIT[x]
-=======
-=======
 #if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT)
 	__m128i bit_weighted_count_sse(unsigned long long, unsigned long long);
 #elif defined (__ARM_NEON)
@@ -126,179 +37,6 @@ extern const unsigned long long X_TO_BIT[];
 	int bit_weighted_count(unsigned long long);
 #endif
 
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-extern unsigned long long X_TO_BIT[];
-extern const unsigned long long NEIGHBOUR[];
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-
-/** Return a bitboard with bit x set. */
-// https://eukaryote.hateblo.jp/entry/2020/04/12/054905
-#ifdef HAS_CPU_64 // 1% slower on Sandy Bridge
-	#define x_to_bit(x) (1ULL << (x))
-#else
-	#define x_to_bit(x) X_TO_BIT[x]
-#endif
-
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-#ifndef __has_builtin
-	#define __has_builtin(x) 0  // Compatibility with non-clang compilers.
-#endif
-
-// mirror byte
-#if defined(_M_ARM) // || defined(_M_ARM64) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html
-#define mirror_byte(b)	(_arm_rbit(b) >> 24)
-#elif defined(__ARM_ACLE)
-#include <arm_acle.h>
-#define mirror_byte(b)	(__rbit(b) >> 24)
-#elif defined(HAS_CPU_64)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-// http://graphics.stanford.edu/~seander/bithacks.html
-#define mirror_byte(b)	(unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32)
-#else
-static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); }
-#endif
-
-<<<<<<< HEAD
->>>>>>> 0ee9c1c (mirror_byte added for 1 byte bit reverse)
-#ifndef __has_builtin
-	#define __has_builtin(x) 0  // Compatibility with non-clang compilers.
->>>>>>> ea39994 (Improve clang compatibility)
-#endif
-
-=======
-// rotl8
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-#if __has_builtin(__builtin_rotateleft8)
-	#define rotl8(x,y)	__builtin_rotateleft8((x),(y))
-#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) && (defined(__x86_64__) || defined(__i386__))
-	#define rotl8(x,y)	__builtin_ia32_rolqi((x),(y))
-#elif defined(_MSC_VER)
-	#define	rotl8(x,y)	_rotl8((x),(y))
-#else	// may not compile into 8-bit rotate
-	#define	rotl8(x,y)	((unsigned char)(((x)<<(y))|((unsigned)(x)>>(8-(y)))))
-#endif
-
-// bswap
-#ifdef _MSC_VER
-	#define	bswap_short(x)	_byteswap_ushort(x)
-	#define	bswap_int(x)	_byteswap_ulong(x)
-	#define	vertical_mirror(x)	_byteswap_uint64(x)
-#else
-	#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16)
-		#define	bswap_short(x)	__builtin_bswap16(x)
-	#else
-		#define bswap_short(x)	(((unsigned short) (x) >> 8) | ((unsigned short) (x) << 8))
-	#endif
-	#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_builtin(__builtin_bswap64)
-		#define	bswap_int(x)	__builtin_bswap32(x)
-		#define	vertical_mirror(x)	__builtin_bswap64(x)
-	#else
-		unsigned int bswap_int(unsigned int);
-		unsigned long long vertical_mirror(unsigned long long);
-	#endif
-#endif
-
-// ctz / clz
-=======
-/** Loop over each bit set. */
-<<<<<<< HEAD
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-#if (defined(__GNUC__) && __GNUC__ >= 4) || __has_builtin(__builtin_ctzll)
-	#define	first_bit(x)	__builtin_ctzll(x)
-	#define	last_bit(x)	(63 - __builtin_clzll(x))
-#elif defined(tzcnt_u64)
-=======
-#if defined(tzcnt_u64)
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-	#define	first_bit(x)	tzcnt_u64(x)
-	#define	last_bit(x)	(63 - lzcnt_u64(x))
-#elif ((defined(__GNUC__) && (__GNUC__ >= 4)) || __has_builtin(__builtin_ctzll)) && !defined(__INTEL_COMPILER)
-	#define	first_bit(x)	__builtin_ctzll(x)
-	#define	last_bit(x)	(63 - __builtin_clzll(x))
-#else
-	int first_bit(unsigned long long);
-	int last_bit(unsigned long long);
-#endif
-
-#if defined(HAS_CPU_64) || !defined(__STDC_HOSTED__)	// __STDC_HOSTED__ (C99) to declare var in for statement
-	#define foreach_bit(i, b)	for (i = first_bit(b); b; i = first_bit(b &= (b - 1)))
-#else
-  #ifdef tzcnt_u32
-	#define	first_bit_32(x)	tzcnt_u32(x)
-  #else
-	int first_bit_32(unsigned int);
-  #endif
-	#define foreach_bit(i, b)	(void) i; for (unsigned int _j = 0; _j < sizeof(b) * CHAR_BIT; _j += sizeof(int) * CHAR_BIT) \
-		for (int _r = (b >> _j), i = first_bit_32(_r) + _j; _r; i = first_bit_32(_r &= (_r - 1)) + _j)
-#endif
-
-// popcount
-#ifdef hasNeon
-  #ifdef HAS_CPU_64
-	#define bit_count(x)	vaddv_u8(vcnt_u8(vcreate_u8(x)))
-	#define bit_count_32(x)	vaddv_u8(vcnt_u8(vcreate_u8((unsigned int) x)))
-  #else
-	#define bit_count(x)	vget_lane_u32(vreinterpret_u32_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vcnt_u8(vcreate_u8(x)))))), 0)
-	#define bit_count_32(x)	vget_lane_u32(vpaddl_u16(vpaddl_u8(vcnt_u8(vcreate_u8(x)))), 0)
-  #endif
-
-#elif defined(POPCOUNT)
-<<<<<<< HEAD
-	/*
-	#if defined (USE_GAS_X64)
-		static inline int bit_count (unsigned long long x) {
-			long long	y;
-			__asm__ ( "popcntq %1,%0" : "=r" (y) : "rm" (x));
-			return y;
-		}
-	#elif defined (USE_GAS_X86)
-		static inline int bit_count (unsigned long long x) {
-			unsigned int	y0, y1;
-			__asm__ ( "popcntl %2,%0\n\t"
-				"popcntl %3,%1"
-				: "=&r" (y0), "=&r" (y1)
-				: "rm" ((unsigned int) x), "rm" ((unsigned int) (x >> 32)));
-			return y0 + y1;
-		}
-	*/
-	#ifdef _MSC_VER
-		#if defined(_M_ARM) || defined(_M_ARM64)
-			#define bit_count(x)	_CountOneBits64(x)
-			#define bit_count_32(x)	_CountOneBits(x)
-		#elif defined(_M_X64)
-			#define bit_count(x)	((int) __popcnt64(x))
-			#define bit_count_32(x)	__popcnt(x)
-		#else
-			#define bit_count(x)	(__popcnt((unsigned int) (x)) + __popcnt((unsigned int) ((x) >> 32)))
-			#define bit_count_32(x)	__popcnt(x)
-		#endif
-	#else
-		#define bit_count(x)	__builtin_popcountll(x)
-		#define bit_count_32(x)	__builtin_popcount(x)
-	#endif
-<<<<<<< HEAD
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-#else
-<<<<<<< HEAD
-	int bit_weighted_count(unsigned long long);
-=======
-	extern unsigned char PopCnt16[1 << 16];
-	static inline int bit_count(unsigned long long b) {
-		union { unsigned long long bb; unsigned short u[4]; } v = { b };
-		return (unsigned char)(PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]);
-	}
->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish)
-#endif
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 extern unsigned long long X_TO_BIT[];
 extern const unsigned long long NEIGHBOUR[];
 
@@ -345,8 +83,6 @@ extern const unsigned long long NEIGHBOUR[];
   #endif
 
 #elif defined(POPCOUNT)
-=======
->>>>>>> 0835dae (Reformat #if's)
   /*
   #if defined (USE_GAS_X64)
 	static inline int bit_count (unsigned long long x) {
@@ -379,11 +115,6 @@ extern const unsigned long long NEIGHBOUR[];
 	#define bit_count(x)	__builtin_popcountll(x)
 	#define bit_count_32(x)	__builtin_popcount(x)
   #endif
-<<<<<<< HEAD
-=======
->>>>>>> 4fac39f (get_spreaded_mobility for SSE/32, bit_count_si64 for SSE2)
-=======
->>>>>>> 0835dae (Reformat #if's)
 	#define bit_count_si64(x)	bit_count(_mm_cvtsi128_si64(x))
 
 #else
@@ -396,14 +127,7 @@ extern const unsigned long long NEIGHBOUR[];
 		union { unsigned int bb; unsigned short u[2]; } v = { b };
 		return (unsigned char)(PopCnt16[v.u[0]] + PopCnt16[v.u[1]]);
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
 	#define bit_count_si64(x)	((unsigned char)(PopCnt16[_mm_extract_epi16((x), 0)] + PopCnt16[_mm_extract_epi16((x), 1)] + PopCnt16[_mm_extract_epi16((x), 2)] + PopCnt16[_mm_extract_epi16((x), 3)]))
-=======
->>>>>>> dc7c79c (Omit unpack from get_edge_stability)
-=======
-	#define bit_count_si64(x)	((unsigned char)(PopCnt16[_mm_extract_epi16((x), 0)] + PopCnt16[_mm_extract_epi16((x), 1)] + PopCnt16[_mm_extract_epi16((x), 2)] + PopCnt16[_mm_extract_epi16((x), 3)]))
->>>>>>> 4fac39f (get_spreaded_mobility for SSE/32, bit_count_si64 for SSE2)
 #endif
 
 #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
@@ -413,7 +137,6 @@ extern const unsigned long long NEIGHBOUR[];
   #ifndef hasMMX
 	extern bool	hasMMX;
   #endif
-<<<<<<< HEAD
 #endif
 
 #if defined(ANDROID) && ((defined(__arm__) && !defined(__ARM_NEON)) || (defined(__i386__) && !defined(hasSSE2)))
@@ -434,85 +157,12 @@ typedef union {
 	__m128i	v2;
 	__m128d	d2;	// used in flip_carry_sse_32.c
   #endif
-=======
-#if defined(__x86_64__) || defined(_M_X64)
-=======
-#if defined(__x86_64__) || defined(_M_X64) || defined(__AVX2__)
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
-#if defined(__SSE2__) || defined(_M_X64)
->>>>>>> bc93772 (Avoid modern compliler warnings)
-	#define hasSSE2	1
-#endif
-
-#ifdef _MSC_VER
-	#include <intrin.h>
-	#ifdef _M_IX86
-		#define	USE_MSVC_X86	1
-	#endif
-#elif defined(hasSSE2)
-	#include <x86intrin.h>
-#endif
-
-#ifdef hasSSE2
-	#define	hasMMX	1
-#endif
-
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-	#ifndef hasSSE2
-=======
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID)
-	#if !defined(hasSSE2) && !defined(hasNeon)
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-	#ifndef hasSSE2
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-		extern bool	hasSSE2;
-	#endif
-	#ifndef hasMMX
-		extern bool	hasMMX;
-	#endif
-=======
->>>>>>> 0835dae (Reformat #if's)
-#endif
-
-#if defined(ANDROID) && ((defined(__arm__) && !defined(__ARM_NEON)) || (defined(__i386__) && !defined(hasSSE2)))
-extern bool	hasSSE2;
-#endif
-
-/** Board : board representation */
-typedef struct Board {
-	unsigned long long player, opponent;     /**< bitboard representation */
-} Board;
-
-typedef union {
-	unsigned long long	ull[2];
-	Board	board;	// for vboard optimization in search
-  #ifdef __ARM_NEON
-	uint64x2_t	v2;
-  #elif defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128i	v2;
-	__m128d	d2;	// used in flip_carry_sse_32.c
-<<<<<<< HEAD
-#endif
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-  #endif
->>>>>>> e22b052 (_mm_cvtsi64_si128 x86 sim using loadl, requires lvalue)
 }
 #if defined(__GNUC__) && !defined(hasSSE2)
 __attribute__ ((aligned (16)))
 #endif
 V2DI;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 typedef union {
 	unsigned long long	ull[4];
   #ifdef __AVX2__
@@ -529,117 +179,6 @@ typedef union {
 typedef union {
 	unsigned long long	ull[8];
   #ifdef __AVX512VL__
-<<<<<<< HEAD
-	__m512i	v8;
-  #endif
-  #ifdef __AVX2__
-	__m256i	v4[2];
-  #endif
-} V8DI;
-
-/* Define function attributes directive when available */
-
-#if (defined(_MSC_VER) || defined(__clang__)) && defined(hasSSE2)
-	#define	vectorcall	__vectorcall
-#elif defined(__GNUC__) && defined(__i386__)
-	#define	vectorcall	__attribute__((sseregparm))
-#elif 0 // defined(__GNUC__)	// erroreous result on pgo-build
-	#define	vectorcall	__attribute__((sysv_abi))
-#else
-	#define	vectorcall
-#endif
-
-// X64 compatibility sims for X86
-#if !defined(HAS_CPU_64) && (defined(hasSSE2) || defined(USE_MSVC_X86))
-	// static inline __m128i _mm_cvtsi64_si128(const unsigned long long x) {
-	//	return _mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(x >> 32));
-	// }
-		// better code but requires lvalue
-	#define	_mm_cvtsi64_si128(x)	_mm_loadl_epi64((__m128i *) &(x))
-	static inline unsigned long long vectorcall _mm_cvtsi128_si64(__m128i x) {
-		return *(unsigned long long *) &x;
-	}
-	static inline unsigned long long vectorcall _mm_extract_epi64(__m128i x, int i) {
-		return ((unsigned long long *) &x)[i];
-	}
-
-  #if defined(_MSC_VER) && _MSC_VER<1900
-	static inline __m128i _mm_set_epi64x(unsigned long long b, unsigned long long a) {
-		return _mm_unpacklo_epi64(_mm_cvtsi64_si128(b), _mm_cvtsi64_si128(a));
-	}
-	static inline __m128i _mm_set1_epi64x(unsigned long long x) {
-		__m128i t = _mm_cvtsi64_si128(x);
-		return _mm_unpacklo_epi64(t, t);
-	}
-  #endif
-#endif // !HAS_CPU_64
-
-#if __clang_major__ == 3	// undefined reference to `llvm.x86.avx.storeu.dq.256'
-	#define	_mm_storeu_si128(a,b)	*(__m128i *)(a) = (b)
-	#define	_mm256_storeu_si256(a,b)	*(__m256i *)(a) = (b)
-=======
-#ifdef __AVX2__
-=======
-#ifdef hasSSE2
->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c)
-typedef union {
-	unsigned long long	ull[4];
-<<<<<<< HEAD
-	#ifdef __AVX2__
-		__m256i	v4;
-	#endif
-	__m128i	v2[2];
-} V4DI;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-typedef union {
-	unsigned long long	ull[4];
-#ifdef __AVX2__
-	__m256i	v4;
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-#endif
-#ifdef hasSSE2
-=======
-#ifdef hasSSE2
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-typedef union {
-	unsigned long long	ull[4];
-	#ifdef __AVX2__
-		__m256i	v4;
-	#endif
-<<<<<<< HEAD
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-=======
-	#ifdef hasSSE2
-<<<<<<< HEAD
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-	__m128i	v2[2];
-=======
-		__m128i	v2[2];
-	#endif
-	#ifdef USE_MSVC_X86
-		__m64	v1[4];
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-	#endif
-=======
-  #ifdef __AVX2__
-	__m256i	v4;
-  #endif
-  #ifdef hasSSE2
-	__m128i	v2[2];
-  #endif
-  #ifdef USE_MSVC_X86
-	__m64	v1[4];
-  #endif
->>>>>>> e22b052 (_mm_cvtsi64_si128 x86 sim using loadl, requires lvalue)
-} V4DI;
-
-typedef union {
-	unsigned long long	ull[8];
-  #ifdef __AVX512F__
-=======
->>>>>>> b1cae3c (Rewrite AVX512 LASTFLIP_HIGHCUT not to use kortest)
 	__m512i	v8;
   #endif
   #ifdef __AVX2__
diff --git a/src/bit_intrinsics.h b/src/bit_intrinsics.h
index 6625411..3ddd483 100644
--- a/src/bit_intrinsics.h
+++ b/src/bit_intrinsics.h
@@ -1,25 +1,9 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
 /**
  * @file bit_intrinsics.h
  *
  * CPU dependent bit operation intrinsics.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 2020 - 2024
-=======
- * @date 2020 - 2022
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
- * @date 2020 - 2023
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-=======
- * @date 2020 - 2024
->>>>>>> a09308f (Renew version string and copyright year)
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
@@ -28,27 +12,11 @@
 #ifndef EDAX_BIT_INTRINSICS_H
 #define EDAX_BIT_INTRINSICS_H
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64)
-	#define	HAS_CPU_64	1
-#endif
-
-#if defined(__SSE2__) || defined(__AVX__) || defined(_M_X64)
-=======
-#if !defined(HAS_CPU_64) && (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64))
-	#define	HAS_CPU_64	1
-#endif
-
-#if defined(__SSE2__) || defined(_M_X64)
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
 #if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64)
 	#define	HAS_CPU_64	1
 #endif
 
 #if defined(__SSE2__) || defined(__AVX__) || defined(_M_X64)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 	#define hasSSE2	1
 #endif
 
@@ -56,60 +24,24 @@
 	#define	hasMMX	1
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #if defined(ANDROID) && defined(__arm__)
   #if __ANDROID_API__ < 21
 	#define	DISPATCH_NEON	1
   #else
 	#define	__ARM_NEON	1
-<<<<<<< HEAD
-  #endif
-#elif defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
-	#define	__ARM_NEON	1
-#endif
-#ifdef __ARM_NEON
-	#include "arm_neon.h"
-=======
-#if defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
-	#define hasNeon	1
-  #ifndef __ARM_NEON__
-	#define	__ARM_NEON__	1
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
   #endif
 #elif defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
 	#define	__ARM_NEON	1
 #endif
-<<<<<<< HEAD
-#ifdef __ARM_NEON__
-#include "arm_neon.h"
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
 #ifdef __ARM_NEON
 	#include "arm_neon.h"
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #endif
 
 #ifdef _MSC_VER
 	#include <intrin.h>
-<<<<<<< HEAD
-<<<<<<< HEAD
   #ifdef _M_IX86
 	#define	USE_MSVC_X86	1
   #endif
-=======
-	#ifdef _M_IX86
-		#define	USE_MSVC_X86	1
-	#endif
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-  #ifdef _M_IX86
-	#define	USE_MSVC_X86	1
-  #endif
->>>>>>> 0835dae (Reformat #if's)
 #elif defined(hasSSE2)
 	#include <x86intrin.h>
 #endif
@@ -119,16 +51,7 @@
 #endif
 
 // mirror byte
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(_M_ARM) // || (defined(_M_ARM64) && _MSC_VER >= 1922)	// https://developercommunity.visualstudio.com/t/ARM64-still-missing-RBIT-intrinsics/10547420
-=======
-#if defined(_M_ARM) || (defined(_M_ARM64) && _MSC_VER >= 1922)	// https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
-=======
-#if defined(_M_ARM) // || (defined(_M_ARM64) && _MSC_VER >= 1922)	// https://developercommunity.visualstudio.com/t/ARM64-still-missing-RBIT-intrinsics/10547420
->>>>>>> 66e8cab (MSC ARM64 still missing _arm_rbit)
 	#define mirror_byte(b)	(_arm_rbit(b) >> 24)
 #elif defined(__ARM_ACLE)
 	#include <arm_acle.h>
@@ -138,22 +61,6 @@
 	#define mirror_byte(b)	(unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32)
 #else
 	static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); }
-=======
-#if defined(_M_ARM) // || defined(_M_ARM64) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html
-	#define mirror_byte(b)	(_arm_rbit(b) >> 24)
-#elif defined(__ARM_ACLE)
-	#include <arm_acle.h>
-	#define mirror_byte(b)	(__rbit(b) >> 24)
-#elif defined(HAS_CPU_64)
-	// http://graphics.stanford.edu/~seander/bithacks.html
-	#define mirror_byte(b)	(unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32)
-#else
-<<<<<<< HEAD
-static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); }
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-	static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); }
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 #endif
 
 // rotl8
@@ -173,10 +80,6 @@ static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x20080
 	#define	bswap_int(x)	_byteswap_ulong(x)
 	#define	vertical_mirror(x)	_byteswap_uint64(x)
 #else
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 0835dae (Reformat #if's)
   #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16)
 	#define	bswap_short(x)	__builtin_bswap16(x)
   #else
@@ -189,23 +92,6 @@ static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x20080
 	unsigned int bswap_int(unsigned int);
 	unsigned long long vertical_mirror(unsigned long long);
   #endif
-<<<<<<< HEAD
-=======
-	#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16)
-		#define	bswap_short(x)	__builtin_bswap16(x)
-	#else
-		#define bswap_short(x)	(((unsigned short) (x) >> 8) | ((unsigned short) (x) << 8))
-	#endif
-	#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_builtin(__builtin_bswap64)
-		#define	bswap_int(x)	__builtin_bswap32(x)
-		#define	vertical_mirror(x)	__builtin_bswap64(x)
-	#else
-		unsigned int bswap_int(unsigned int);
-		unsigned long long vertical_mirror(unsigned long long);
-	#endif
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
->>>>>>> 0835dae (Reformat #if's)
 #endif
 
 // lzcnt / tzcnt (0 allowed)
@@ -265,23 +151,11 @@ static inline int _tzcnt_u64(unsigned long long x) {
 
 #elif defined(_MSC_VER)
 	static inline int lzcnt_u32(unsigned int n) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		unsigned long i;
-=======
-		unsigned int i;
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-		unsigned long i;
->>>>>>> 77ab3e9 (Experimental branchless AVX512 lastflip in endgame_sse.c)
 		if (!_BitScanReverse(&i, n))
 			i = 32 ^ 31;
 		return i ^ 31;
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 0835dae (Reformat #if's)
   #ifdef _M_X64
 	static inline int lzcnt_u64(unsigned long long n) {
 		unsigned long i;
@@ -299,7 +173,6 @@ static inline int _tzcnt_u64(unsigned long long x) {
 		return i ^ 63;
 	}
   #endif
-<<<<<<< HEAD
 
 #elif defined(__ARM_FEATURE_CLZ)
   #if __ARM_ACLE >= 110
@@ -309,45 +182,6 @@ static inline int _tzcnt_u64(unsigned long long x) {
 	#define	lzcnt_u32(x)	__builtin_clz(x)
 	#define	lzcnt_u64(x)	__builtin_clzll(x)
   #endif
-=======
-	#ifdef _M_X64
-		static inline int lzcnt_u64(unsigned long long n) {
-			unsigned long i;
-			if (!_BitScanReverse64(&i, n))
-				i = 64 ^ 63;
-			return i ^ 63;
-		}
-	#else
-		static inline int lzcnt_u64(unsigned long long n) {
-			unsigned long i;
-			if (_BitScanReverse(&i, n >> 32))
-				return i ^ 31;
-			if (!_BitScanReverse(&i, (unsigned int) n))
-				i = 64 ^ 63;
-			return i ^ 63;
-		}
-	#endif
-
-#elif defined(__ARM_FEATURE_CLZ)
-	#if __ARM_ACLE >= 110
-		#define	lzcnt_u32(x)	__clz(x)
-		#define	lzcnt_u64(x)	__clzll(x)
-	#else // strictly-incorrect patch
-		#define	lzcnt_u32(x)	__builtin_clz(x)
-		#define	lzcnt_u64(x)	__builtin_clzll(x)
-	#endif
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-
-#elif defined(__ARM_FEATURE_CLZ)
-  #if __ARM_ACLE >= 110
-	#define	lzcnt_u32(x)	__clz(x)
-	#define	lzcnt_u64(x)	__clzll(x)
-  #else // strictly-incorrect patch
-	#define	lzcnt_u32(x)	__builtin_clz(x)
-	#define	lzcnt_u64(x)	__builtin_clzll(x)
-  #endif
->>>>>>> 0835dae (Reformat #if's)
 
 #else
 	static inline int lzcnt_u32(unsigned long x) { return (x ? __builtin_clz(x) : 32); }
@@ -359,261 +193,11 @@ static inline int _tzcnt_u64(unsigned long long x) {
 	#define	tzcnt_u64(x)	_tzcnt_u64(x)
 
 #elif defined(__ARM_FEATURE_CLZ)
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 0835dae (Reformat #if's)
   #ifdef _M_ARM
 	#define	tzcnt_u32(x)	_arm_clz(_arm_rbit(x))
   #elif __has_builtin(__rbit) // (__ARM_ARCH >= 6 && __ARM_ISA_THUMB >= 2) || __ARM_ARCH >= 7	// not for gcc
 	#define	tzcnt_u32(x)	__clz(__rbit(x))
   #endif
-<<<<<<< HEAD
-#endif
-
-#if defined(__SSE4_2__) || defined(__AVX__)
-  #ifdef HAS_CPU_64
-	#define	crc32c_u64(crc,d)	_mm_crc32_u64((crc),(d))
-  #else
-	#define	crc32c_u64(crc,d)	_mm_crc32_u32(_mm_crc32_u32((crc),(d)),((d)>>32))
-  #endif
-	#define	crc32c_u8(crc,d)	_mm_crc32_u8((crc),(d))
-
-#elif defined(__ARM_FEATURE_CRC32)
-	#include "arm_acle.h"
-	#define	crc32c_u64(crc,d)	__crc32cd((crc),(d))
-	#define crc32c_u8(crc,d)	__crc32cb((crc),(d))
-
-#else
-	unsigned int crc32c_u64(unsigned int crc, unsigned long long data);
-	unsigned int crc32c_u8(unsigned int crc, unsigned int data);
-#endif
-
-#endif // EDAX_BIT_INTRINSICS_H
-=======
-/**
- * @file bit_intrinsics.h
- *
- * CPU dependent bit operation intrinsics.
- *
- * @date 2020 - 2021
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
- */
-
-#ifndef EDAX_BIT_INTRINSICS_H
-#define EDAX_BIT_INTRINSICS_H
-
-#if !defined(HAS_CPU_64) && (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64))
-	#define	HAS_CPU_64	1
-#endif
-
-#if defined(__SSE2__) || defined(_M_X64)
-	#define hasSSE2	1
-#endif
-
-#ifdef hasSSE2
-	#define	hasMMX	1
-#endif
-
-#if defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
-	#define hasNeon	1
-	#ifndef __ARM_NEON__
-		#define	__ARM_NEON__	1
-	#endif
-#endif
-#ifdef __ARM_NEON__
-#include "arm_neon.h"
-#endif
-
-#ifdef _MSC_VER
-	#include <intrin.h>
-	#ifdef _M_IX86
-		#define	USE_MSVC_X86	1
-	#endif
-#elif defined(hasSSE2)
-	#include <x86intrin.h>
-#endif
-
-#ifndef __has_builtin
-	#define __has_builtin(x) 0  // Compatibility with non-clang compilers.
-#endif
-
-// mirror byte
-#if defined(_M_ARM) // || defined(_M_ARM64) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html
-#define mirror_byte(b)	(_arm_rbit(b) >> 24)
-#elif defined(__ARM_ACLE)
-#include <arm_acle.h>
-#define mirror_byte(b)	(__rbit(b) >> 24)
-#elif defined(HAS_CPU_64)
-// http://graphics.stanford.edu/~seander/bithacks.html
-#define mirror_byte(b)	(unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32)
-#else
-static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); }
-#endif
-
-// rotl8
-#if __has_builtin(__builtin_rotateleft8)
-	#define rotl8(x,y)	__builtin_rotateleft8((x),(y))
-#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) && (defined(__x86_64__) || defined(__i386__))
-	#define rotl8(x,y)	__builtin_ia32_rolqi((x),(y))
-#elif defined(_MSC_VER)
-	#define	rotl8(x,y)	_rotl8((x),(y))
-#else	// may not compile into 8-bit rotate
-	#define	rotl8(x,y)	((unsigned char)(((x)<<(y))|((unsigned char)(x)>>(8-(y)))))
-#endif
-
-// bswap
-#ifdef _MSC_VER
-	#define	bswap_short(x)	_byteswap_ushort(x)
-	#define	bswap_int(x)	_byteswap_ulong(x)
-	#define	vertical_mirror(x)	_byteswap_uint64(x)
-#else
-	#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16)
-		#define	bswap_short(x)	__builtin_bswap16(x)
-	#else
-		#define bswap_short(x)	(((unsigned short) (x) >> 8) | ((unsigned short) (x) << 8))
-	#endif
-	#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_builtin(__builtin_bswap64)
-		#define	bswap_int(x)	__builtin_bswap32(x)
-		#define	vertical_mirror(x)	__builtin_bswap64(x)
-	#else
-		unsigned int bswap_int(unsigned int);
-		unsigned long long vertical_mirror(unsigned long long);
-	#endif
-#endif
-
-// lzcnt / tzcnt (0 allowed)
-
-#ifdef USE_GAS_X86
-#ifdef __LZCNT__
-static inline int _lzcnt_u64(unsigned long long x) {
-	int	y;
-	__asm__ (
-		"lzcntl	%1, %0\n\t"
-		"lzcntl	%2, %2\n\t"
-		"leal	(%0, %2), %0\n\t"
-		"cmovnc	%2, %0"
-	: "=&r" (y) : "0" ((unsigned int) x), "r" ((unsigned int) (x >> 32)) );
-	return y;
-}
-#endif
-#ifdef __BMI__
-static inline int _tzcnt_u64(unsigned long long x) {
-	int	y;
-	__asm__ (
-		"tzcntl	%1, %0\n\t"
-		"tzcntl	%2, %2\n\t"
-		"leal	(%0, %2), %0\n\t"
-		"cmovnc	%2, %0"
-	: "=&r" (y) : "0" ((unsigned int) (x >> 32)), "r" ((unsigned int) x) );
-	return y;
-}
-#endif
-#elif defined(USE_MSVC_X86) && (defined(__AVX2__) || defined(__LZCNT__))
-static inline int _lzcnt_u64(unsigned long long x) {
-	__asm {
-		lzcnt	eax, dword ptr x
-		lzcnt	edx, dword ptr x+4
-		lea	eax, [eax+edx]
-		cmovnc	eax, edx
-	}
-}
-
-static inline int _tzcnt_u64(unsigned long long x) {
-	__asm {
-		tzcnt	eax, dword ptr x+4
-		tzcnt	edx, dword ptr x
-		lea	eax, [eax+edx]
-		cmovnc	eax, edx
-	}
-}
-#endif
-
-#if defined(__AVX2__) || defined(__LZCNT__)
-	#define	lzcnt_u32(x)	_lzcnt_u32(x)
-	#define	lzcnt_u64(x)	_lzcnt_u64(x)
-
-#elif defined(_M_ARM) || defined(_M_ARM64)
-	#define lzcnt_u32(x)	_CountLeadingZeros(x)
-	#define lzcnt_u64(x)	_CountLeadingZeros64(x)
-
-#elif defined(_MSC_VER)
-	static inline int lzcnt_u32(unsigned int n) {
-		unsigned int i;
-		if (!_BitScanReverse(&i, n))
-			i = 32 ^ 31;
-		return i ^ 31;
-	}
-	#ifdef _M_X64
-		static inline int lzcnt_u64(unsigned long long n) {
-			unsigned long i;
-			if (!_BitScanReverse64(&i, n))
-				i = 64 ^ 63;
-			return i ^ 63;
-		}
-	#else
-		static inline int lzcnt_u64(unsigned long long n) {
-			unsigned long i;
-			if (_BitScanReverse(&i, n >> 32))
-				return i ^ 31;
-			if (!_BitScanReverse(&i, (unsigned int) n))
-				i = 64 ^ 63;
-			return i ^ 63;
-		}
-	#endif
-
-#elif defined(__ARM_FEATURE_CLZ)
-	#if __ARM_ACLE >= 110
-		#define	lzcnt_u32(x)	__clz(x)
-		#define	lzcnt_u64(x)	__clzll(x)
-	#else // strictly-incorrect patch
-		#define	lzcnt_u32(x)	__builtin_clz(x)
-		#define	lzcnt_u64(x)	__builtin_clzll(x)
-	#endif
-
-#else
-	static inline int lzcnt_u32(unsigned long x) { return (x ? __builtin_clz(x) : 32); }
-	static inline int lzcnt_u64(unsigned long x) { return (x ? __builtin_clzll(x) : 64); }
-#endif
-
-#if defined(__BMI__) || defined(__AVX2__)
-	#define	tzcnt_u32(x)	_tzcnt_u32(x)
-	#define	tzcnt_u64(x)	_tzcnt_u64(x)
-
-#elif defined(__ARM_FEATURE_CLZ)
-	#ifdef _M_ARM
-		#define	tzcnt_u32(x)	_arm_clz(_arm_rbit(x))
-	#elif __ARM_ACLE >= 110
-		#define	tzcnt_u32(x)	__clz(__rbit(x))
-	// #elif defined(__GNUC__)
-	//	#define	tzcnt_u32(x)	__builtin_ctz(x)	// '& 0x07' optimized out assuming x != 0
-	#endif
-#endif
-
-#if defined(__SSE4_2__) || defined(__AVX__)
-	#ifdef HAS_CPU_64
-		#define	crc32c_u64(crc,d)	_mm_crc32_u64((crc),(d))
-	#else
-		#define	crc32c_u64(crc,d)	_mm_crc32_u32(_mm_crc32_u32((crc),(d)),((d)>>32))
-	#endif
-#elif defined(__ARM_FEATURE_CRC32)
-	#define	crc32c_u64(crc,d)	__crc32cd((crc),(d))
-#else
-	unsigned int crc32c_u64(unsigned int crc, unsigned long long data);
-#endif
-
-#endif // EDAX_BIT_INTRINSICS_H
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	#ifdef _M_ARM
-		#define	tzcnt_u32(x)	_arm_clz(_arm_rbit(x))
-	#elif __has_builtin(__rbit) // (__ARM_ARCH >= 6 && __ARM_ISA_THUMB >= 2) || __ARM_ARCH >= 7	// not for gcc
-		#define	tzcnt_u32(x)	__clz(__rbit(x))
-	#endif
-=======
->>>>>>> 0835dae (Reformat #if's)
 #endif
 
 #if defined(__SSE4_2__) || defined(__AVX__)
@@ -635,4 +219,3 @@ static inline int _tzcnt_u64(unsigned long long x) {
 #endif
 
 #endif // EDAX_BIT_INTRINSICS_H
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
diff --git a/src/board.c b/src/board.c
index e017836..e4f31fb 100644
--- a/src/board.c
+++ b/src/board.c
@@ -11,35 +11,7 @@
  * some board properties. Most of the functions are optimized to be as fast as
  * possible, while remaining readable.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2018
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
- * @date 1998 - 2020
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
- * @date 1998 - 2023
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-=======
- * @date 1998 - 2024
->>>>>>> b4fb773 (AVX optimized board_unique)
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
@@ -62,27 +34,12 @@
 #elif MOVE_GENERATOR == MOVE_GENERATOR_SSE
 	#include "flip_sse.c"
 #elif MOVE_GENERATOR == MOVE_GENERATOR_BITSCAN
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
   #ifdef __ARM_NEON
 	#define	flip_neon	flip
 	#include "flip_neon_bitscan.c"
   #else
 	#include "flip_bitscan.c"
   #endif
-<<<<<<< HEAD
-=======
-	#ifdef hasNeon
-		#define	flip_neon	flip
-		#include "flip_neon_bitscan.c"
-	#else
-		#include "flip_bitscan.c"
-	#endif
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #elif MOVE_GENERATOR == MOVE_GENERATOR_ROXANE
 	#include "flip_roxane.c"
 #elif MOVE_GENERATOR == MOVE_GENERATOR_32
@@ -91,107 +48,29 @@
 	#include "flip_sse_bswap.c"
 #elif MOVE_GENERATOR == MOVE_GENERATOR_AVX
 	#include "flip_avx_ppfill.c"
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #elif MOVE_GENERATOR == MOVE_GENERATOR_AVX512
 	#include "flip_avx512cd.c"
 #elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
   #ifdef __aarch64__
 	#include "flip_neon_rbit.c"
   #else
 	#include "flip_neon_lzcnt.c"
   #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif MOVE_GENERATOR == MOVE_GENERATOR_SVE
-	#include "flip_sve_lzcnt.c"
-=======
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-=======
-#elif MOVE_GENERATOR == MOVE_GENERATOR_AVX512
-	#include "flip_avx512cd.c"
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-	#include "flip_neon_lzcnt.c"
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-	#ifdef __aarch64__
-		#include "flip_neon_rbit.c"
-	#else
-		#include "flip_neon_lzcnt.c"
-	#endif
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-=======
 #elif MOVE_GENERATOR == MOVE_GENERATOR_SVE
 	#include "flip_sve_lzcnt.c"
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
 #else // MOVE_GENERATOR == MOVE_GENERATOR_KINDERGARTEN
 	#include "flip_kindergarten.c"
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if LAST_FLIP_COUNTER == COUNT_LAST_FLIP_CARRY
-	#include "count_last_flip_carry_64.c"
-#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE
-	#include "count_last_flip_sse.c"
-#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BITSCAN
-	#include "count_last_flip_bitscan.c"
-#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN
-	#include "count_last_flip_plain.c"
-#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32
-	#include "count_last_flip_32.c"
-#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2
-	#include "count_last_flip_bmi2.c"
-#else // LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN
-	#include "count_last_flip_kindergarten.c"
-#endif
-
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
->>>>>>> 6506166 (More SSE optimizations)
 
 /** edge stability global data */
 unsigned char edge_stability[256 * 256];
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2)
 	#include "board_mmx.c"
-<<<<<<< HEAD
 #endif
 #if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(hasSSE2) || defined(__ARM_NEON)) && !defined(ANDROID)
 	#include "board_sse.c"
-=======
-/** conversion from an 8-bit line to the A1-A8 line */
-// unsigned long long A1_A8[256];
-
-=======
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-=======
-#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2)
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-#include "board_mmx.c"
-#endif
-#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(hasSSE2) || defined(hasNeon)) && !defined(ANDROID)
-#include "board_sse.c"
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-#endif
-#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(hasSSE2) || defined(__ARM_NEON)) && !defined(ANDROID)
-	#include "board_sse.c"
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #endif
 
 
@@ -360,34 +239,8 @@ bool board_lesser(const Board *b1, const Board *b2)
 	if (b1->player != b2->player)
 		return (b1->player < b2->player);
 	else	return (b1->opponent < b2->opponent);
-<<<<<<< HEAD
-=======
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-/**
- * @brief Compare two board for equality
- *
- * @param b1 first board
- * @param b2 second board
- * @return true if both board are equal
- */
-bool board_equal(const Board *b1, const Board *b2)
-{
-	return (b1->player == b2->player && b1->opponent == b2->opponent);
->>>>>>> 8a7e354 (Exclude hash init time from count games; other minor size opts)
 }
 
-=======
->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code)
-#if !defined(hasSSE2) && !defined(hasNeon)	// SSE version in board_sse.c
-=======
-#if !defined(hasSSE2) && !defined(__ARM_NEON)	// SSE version in board_sse.c
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-=======
->>>>>>> 6bc747d (Split board_flip_* from board_symetry)
 /**
  * @brief symetric board
  *
@@ -413,21 +266,9 @@ void board_transpose(const Board *board, Board *sym)
 	sym->player = transpose(board->player);
 	sym->opponent = transpose(board->opponent);
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-void board_symetry(const Board *board, const int s, Board *sym)
-{
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#endif
-=======
->>>>>>> a23c3d4 (SSE optimized board_symetry again)
 
 void board_symetry(const Board *board, const int s, Board *sym)
 {
->>>>>>> 6bc747d (Split board_flip_* from board_symetry)
 	*sym = *board;
 	if (s & 1)
 		board_horizontal_mirror(sym, sym);
@@ -435,34 +276,6 @@ void board_symetry(const Board *board, const int s, Board *sym)
 		board_vertical_mirror(sym, sym);
 	if (s & 4)
 		board_transpose(sym, sym);
-<<<<<<< HEAD
-=======
-	register unsigned long long player, opponent;
-=======
-	unsigned long long player, opponent;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-	player = board->player;
-	opponent = board->opponent;
-
-	if (s & 1) {
-		player = horizontal_mirror(player);
-		opponent = horizontal_mirror(opponent);
-	}
-	if (s & 2) {
-		player = vertical_mirror(player);
-		opponent = vertical_mirror(opponent);
-	}
-	if (s & 4) {
-		player = transpose(player);
-		opponent = transpose(opponent);
-	}
-
-	sym->player = player;
-	sym->opponent = opponent;
->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build)
-=======
->>>>>>> 6bc747d (Split board_flip_* from board_symetry)
 
 	board_check(sym);
 }
@@ -527,15 +340,7 @@ void board_rand(Board *board, int n_ply, Random *r)
 				break;
 			}
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
-		board_get_move_flip(board, get_rand_bit(moves, r), &move);
-=======
-		board_get_move(board, get_rand_bit(moves, r), &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		board_get_move_flip(board, get_rand_bit(moves, r), &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		board_update(board, &move);
 	}
 }
@@ -553,13 +358,6 @@ void board_rand(Board *board, int n_ply, Random *r)
  */
 unsigned long long board_get_move_flip(const Board *board, const int x, Move *move)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	move->flipped = board_flip(board, x);
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 542ee82 (Change store order to reduce register saving)
 	move->x = x;
 	move->flipped = board_flip(board, x);
 	return move->flipped;
@@ -580,17 +378,6 @@ bool board_check_move(const Board *board, Move *move)
 	else return true;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if !(defined(hasMMX) && (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)))
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-#if !(defined(hasMMX) && (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)))	// 32bit MMX/SSE version in board_mmx.c
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 /**
  * @brief Update a board.
  *
@@ -602,29 +389,12 @@ bool board_check_move(const Board *board, Move *move)
  */
 void board_update(Board *board, const Move *move)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__))	// 3DNow CPU has fast emms, and possibly slow SSE
-=======
-#if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__))
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-=======
-#if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__))	// 3DNow CPU has fast emms, and possibly slow SSE
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 	__m128i	OP = _mm_loadu_si128((__m128i *) board);
 	OP = _mm_xor_si128(OP, _mm_or_si128(_mm_set1_epi64x(move->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
 	_mm_storeu_si128((__m128i *) board, _mm_shuffle_epi32(OP, 0x4e));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif defined(hasMMX)
-=======
-#elif defined(hasMMX)	// 3DNow CPU has fast emms
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-=======
 #elif defined(hasMMX)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 	__m64	F = *(__m64 *) &move->flipped;
 	__m64	P = _m_pxor(*(__m64 *) &board->player, _m_por(F, *(__m64 *) &X_TO_BIT[move->x]));
 	__m64	O = _m_pxor(*(__m64 *) &board->opponent, F);
@@ -637,14 +407,6 @@ void board_update(Board *board, const Move *move)
 	board->opponent = board->player ^ (move->flipped | X_TO_BIT[move->x]);
 	board->player = O ^ move->flipped;
 #endif
-<<<<<<< HEAD
-=======
-	unsigned long long O = board->opponent;
-	board->opponent = board->player ^ (move->flipped | X_TO_BIT[move->x]);
-	board->player = O ^ move->flipped;
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	board_check(board);
 }
 
@@ -659,10 +421,6 @@ void board_update(Board *board, const Move *move)
  */
 void board_restore(Board *board, const Move *move)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 #if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__))
 	__m128i	OP = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e);
 	OP = _mm_xor_si128(OP, _mm_or_si128(_mm_set1_epi64x(move->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
@@ -681,22 +439,8 @@ void board_restore(Board *board, const Move *move)
 	board->player = board->opponent ^ (move->flipped | X_TO_BIT[move->x]);
 	board->opponent = P ^ move->flipped;
 #endif
-<<<<<<< HEAD
-=======
-	unsigned long long P = board->player;
-	board->player = board->opponent ^ (move->flipped | X_TO_BIT[move->x]);
-	board->opponent = P ^ move->flipped;
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-	board_check(board);
-}
-<<<<<<< HEAD
-=======
-#endif // hasMMX
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 	board_check(board);
 }
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 
 /**
  * @brief Passing move
@@ -712,19 +456,7 @@ void board_pass(Board *board)
 	board_check(board);
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if (MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_AVX512) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON)	// SSE version in board_sse.c
-=======
-#if !(defined(hasSSE2) && ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)))	// SSE version in endgame_sse.c
->>>>>>> 6506166 (More SSE optimizations)
-=======
-#if (MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON)	// SSE version in board_sse.c
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#if (MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_AVX512) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON)	// SSE version in board_sse.c
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
 /**
  * @brief Compute a board resulting of a move played on a previous board.
  *
@@ -743,53 +475,9 @@ unsigned long long board_next(const Board *board, const int x, Board *next)
 
 	return flipped;
 }
-<<<<<<< HEAD
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if !defined(hasSSE2) && !defined(__ARM_NEON)	// SSE version in board_sse.c
-=======
-/**
- * @brief Compute a board resulting of an opponent move played on a previous board.
- *
- * Compute the board after passing and playing a move.
- *
- * @param board board to play the move on.
- * @param x opponent move to play.
- * @param next resulting board.
- * @return flipped discs.
- */
-unsigned long long board_pass_next(const Board *board, const int x, Board *next)
-{
-	const unsigned long long flipped = Flip(x, board->opponent, board->player);
-
-	next->opponent = board->opponent ^ (flipped | x_to_bit(x));
-	next->player = board->player ^ flipped;
-
-	return flipped;
-}
-=======
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-#endif
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if !defined(__x86_64__) && !defined(_M_X64) && !defined(__AVX2__)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#if !defined(__x86_64__) && !defined(_M_X64) && !defined(__AVX2__)	// sse version in board_sse.c
->>>>>>> 6506166 (More SSE optimizations)
-=======
-#if !defined(hasSSE2) && !defined(hasNeon)	// sse version in board_sse.c
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#if !defined(hasSSE2) && !defined(hasNeon)	// SSE version in board_sse.c
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-=======
-#if !defined(hasSSE2) && !defined(__ARM_NEON)	// SSE version in board_sse.c
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 /**
  * @brief Get a part of the moves.
  *
@@ -869,46 +557,13 @@ static inline unsigned long long get_some_moves(const unsigned long long P, cons
  * @param O bitboard with opponent's discs.
  * @return all legal moves in a 64-bit unsigned integer.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if !defined(__x86_64__) && !defined(_M_X64)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 unsigned long long get_moves(const unsigned long long P, const unsigned long long O)
 {
 	unsigned long long moves, OM;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON)
-=======
-	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
 	if (hasSSE2)
 		return get_moves_sse(P, O);
-<<<<<<< HEAD
-	#endif
-	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-	if (hasMMX)
-=======
-=======
-	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID)
-=======
-	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-	if (hasSSE2)
-		return get_moves_sse(P, O);
-<<<<<<< HEAD
-	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	else if (hasMMX)
->>>>>>> 0f2fb39 (Chage 32-bit get_moves_mmx/sse parameters to 64 bits)
-		return get_moves_mmx(P, O);
-=======
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
 	#endif
 	#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 	if (hasMMX)
@@ -923,15 +578,7 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon
 
 	return moves & ~(P|O);	// mask with empties
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-#endif // hasSSE2/__ARM_NEON
-=======
-#endif // hasSSE2/hasNeon
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 #endif // hasSSE2/__ARM_NEON
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 
 /**
  * @brief Get legal moves on a 6x6 board.
@@ -944,19 +591,9 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon
  */
 unsigned long long get_moves_6x6(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long PM = P & 0x007E7E7E7E7E7E00;
-	unsigned long long OM = O & 0x007E7E7E7E7E7E00;
-	return get_moves(PM, OM) & 0x007E7E7E7E7E7E00;
-=======
-	return get_moves(P & 0x007E7E7E7E7E7E00, O & 0x007E7E7E7E7E7E00) & 0x007E7E7E7E7E7E00;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	unsigned long long PM = P & 0x007E7E7E7E7E7E00;
 	unsigned long long OM = O & 0x007E7E7E7E7E7E00;
 	return get_moves(PM, OM) & 0x007E7E7E7E7E7E00;
->>>>>>> e22b052 (_mm_cvtsi64_si128 x86 sim using loadl, requires lvalue)
 }
 
 /**
@@ -968,23 +605,7 @@ unsigned long long get_moves_6x6(const unsigned long long P, const unsigned long
  */
 bool can_move(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasMMX) || defined(__ARM_NEON)
-=======
-#if defined(USE_GAS_MMX) || defined(__x86_64__) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-#if defined(__x86_64__) || defined(_M_X64) || defined(hasMMX)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#if defined(hasMMX) || defined(hasNeon)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#if defined(hasMMX) || defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 	return get_moves(P, O) != 0;
 
 #else
@@ -1024,27 +645,7 @@ int get_mobility(const unsigned long long P, const unsigned long long O)
 	return bit_count(get_moves(P, O));
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef __AVX2__	// AVX2 version in board_sse.c
-=======
-int get_weighted_mobility(const unsigned long long P, const unsigned long long O)
-{
-	return bit_weighted_count(get_moves(P, O));
-}
-
-<<<<<<< HEAD
-#ifndef __AVX2__
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-=======
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
-=======
-#ifndef __AVX2__
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-=======
 #ifndef __AVX2__	// AVX2 version in board_sse.c
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 /**
  * @brief Get some potential moves.
  *
@@ -1074,44 +675,7 @@ unsigned long long get_potential_moves(const unsigned long long P, const unsigne
 		| get_some_potential_moves(O & 0x007E7E7E7E7E7E00, 9))
 		& ~(P|O); // mask with empties
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #endif // AVX2
-=======
-=======
-#endif // AVX2
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-=======
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
-
-  #if !(defined(hasSSE2) && !defined(POPCOUNT)) && !defined(hasNeon)
-/**
- * @brief Get potential mobility.
- *
- * Count the list of empty squares in contact of a player square.
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return a count of potential moves.
- */
-int get_potential_mobility(const unsigned long long P, const unsigned long long O)
-{
-    #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-	if (hasMMX)
-		return get_potential_mobility_mmx(P, O);
-    #endif
-	return bit_weighted_count(get_potential_moves(P, O));
-}
-<<<<<<< HEAD
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-  #endif
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-#endif // AVX2
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
 
 /**
  * @brief search stable edge patterns.
@@ -1134,10 +698,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable)
 		if (E & X) { // is x an empty square ?
 			O = old_O;
 			P = old_P | X; // player plays on it
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 			if (X > 0x02) { // flip left discs (using parallel prefix)
 				F  = O & (X >> 1);
 				F |= O & (F >> 1);
@@ -1145,7 +705,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable)
 				F |= O2 & (F >> 2);
 				F |= O2 & (F >> 2);
 				F &= -(P & (F >> 1));
-<<<<<<< HEAD
 				O ^= F;
 				P ^= F;
 			}
@@ -1154,45 +713,12 @@ static int find_edge_stable(const int old_P, const int old_O, int stable)
 				F -= (X + X) & -(int)(F != 0);
 				O ^= F;
 				P ^= F;
-=======
-			// if (X > 0x02) { // flip left discs (using parallel prefix)
-			F  = O & (X >> 1);
-			F |= O & (F >> 1);
-			Y  = O & (O >> 1);
-			F |= Y & (F >> 2);
-			F |= Y & (F >> 2);
-			F &= -(P & (F >> 1));
-			O ^= F;
-			P ^= F;
-			// }
-			// if (X < 0x40) { // flip right discs (using carry propagation)
-			F = (O + X + X) & P;
-			if (F) {
-				F -= X + X;
-				O ^= F;
-				P ^= F;
-			}
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-				O ^= F;
-				P ^= F;
-			}
-			// if (X < 0x40) { // flip right discs (using carry propagation)
-				F = (O + X + X) & P;
-				F -= (X + X) & -(int)(F != 0);
-				O ^= F;
-				P ^= F;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 			// }
 			stable = find_edge_stable(P, O, stable); // next move
 			if (!stable) return stable;
 
 			P = old_P;
 			O = old_O | X; // opponent plays on it
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 			if (X > 0x02) { // flip left discs (using parallel prefix)
 				F  = P & (X >> 1);
 				F |= P & (F >> 1);
@@ -1200,7 +726,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable)
 				F |= O2 & (F >> 2);
 				F |= O2 & (F >> 2);
 				F &= -(O & (F >> 1));
-<<<<<<< HEAD
 				O ^= F;
 				P ^= F;
 			}
@@ -1209,35 +734,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable)
 				F -= (X + X) & -(int)(F != 0);
 				O ^= F;
 				P ^= F;
-=======
-			// if (X > 0x02) { // flip left discs (using parallel prefix)
-			F  = P & (X >> 1);
-			F |= P & (F >> 1);
-			Y  = P & (P >> 1);
-			F |= Y & (F >> 2);
-			F |= Y & (F >> 2);
-			F &= -(O & (F >> 1));
-			O ^= F;
-			P ^= F;
-			// }
-			// if (X < 0x40) { // flip right discs (using carry propagation)
- 			F = (P + X + X) & O;
-			if (F) {
-				F -= X + X;
-				O ^= F;
-				P ^= F;
-			}
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-				O ^= F;
-				P ^= F;
-			}
-			// if (X < 0x40) { // flip right discs (using carry propagation)
-	 			F = (P + X + X) & O;
-				F -= (X + X) & -(int)(F != 0);
-				O ^= F;
-				P ^= F;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 			// }
 			stable = find_edge_stable(P, O, stable); // next move
 			if (!stable) return stable;
@@ -1248,15 +744,7 @@ static int find_edge_stable(const int old_P, const int old_O, int stable)
 }
 
 /**
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @brief Initialize the edge stability table.
-=======
- * @brief Initialize the edge stability and A1_A8 tables.
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
- * @brief Initialize the edge stability table.
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
  */
 void edge_stability_init(void)
 {
@@ -1271,46 +759,12 @@ void edge_stability_init(void)
 		} else {
 			rPO = horizontal_mirror_32(PO);
 			if (PO > rPO)
-<<<<<<< HEAD
-<<<<<<< HEAD
-				edge_stability[PO] = mirror_byte(edge_stability[rPO]);
-=======
-				edge_stability[PO] = horizontal_mirror_32(edge_stability[rPO]);
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
 				edge_stability[PO] = mirror_byte(edge_stability[rPO]);
->>>>>>> 0ee9c1c (mirror_byte added for 1 byte bit reverse)
 			else
 				edge_stability[PO] = find_edge_stable(P, O, P);
 		}
 	}
 	// printf("edge_stability_init: %d\n", (int)(cpu_clock() - t));
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-
-#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2)
-	init_mmx();
-#endif
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-
-	/* Q = 0;
-	for (P = 0; P < 256; ++P) {
-		A1_A8[P] = Q;
-		Q = ((Q | ~0x0101010101010101) + 1) & 0x0101010101010101;
-<<<<<<< HEAD
-	}
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	} */
->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8)
-=======
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
 }
 
 #ifdef HAS_CPU_64
@@ -1321,180 +775,8 @@ void edge_stability_init(void)
 #define	packH1H8(X)	(((((unsigned int)((X) >> 32) & 0x80808080) + (((unsigned int)(X) & 0x80808080) >> 4)) * 0x00204081) >> 24)
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if !defined(hasSSE2) && !defined(__ARM_NEON)
-=======
-#if !defined(__x86_64__) && !defined(_M_X64)
-=======
-#ifndef HAS_CPU_64
-=======
-#ifndef __AVX2__
-<<<<<<< HEAD
-#if !(defined(__aarch64__) || defined(_M_ARM64) || defined(hasSSE2))
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-#if !defined(hasNeon) && !defined(hasSSE2)
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
-#if !defined(__AVX2__) && !defined(hasNeon) && !defined(hasSSE2)
->>>>>>> dc7c79c (Omit unpack from get_edge_stability)
-=======
-#if !defined(__AVX2__) && !defined(__ARM_NEON) && !defined(hasSSE2)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-=======
 #if !defined(hasSSE2) && !defined(__ARM_NEON)
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
-/**
- * @brief Get stable edge.
- *
- * Compute the exact stable edges from precomputed tables.
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return a bitboard with (some of) player's stable discs.
- *
- */
-unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
-{	// compute the exact stable edges (from precomputed tables)
-	return edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)]
-	    |  (unsigned long long) edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 56
-	    |  unpackA2A7(edge_stability[packA1A8(P) * 256 + packA1A8(O)])
-	    |  unpackH2H7(edge_stability[packH1H8(P) * 256 + packH1H8(O)]);
-}
-
-/**
- * @brief Estimate the stability of edges.
- *
- * Count the number (in fact a lower estimate) of stable discs on the edges.
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return the number of stable discs on the edges.
- */
-int get_edge_stability(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int packedstable = edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)]
-	  | edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 8
-	  | edge_stability[packA1A8(P) * 256 + packA1A8(O)] << 16
-	  | edge_stability[packH1H8(P) * 256 + packH1H8(O)] << 24;
-	return bit_count_32(packedstable & 0xffff7e7e);
-}
-<<<<<<< HEAD
-<<<<<<< HEAD
-#endif
-<<<<<<< HEAD
-=======
-#endif
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-
-#if !defined(HAS_CPU_64) && !(defined(ANDROID) && (defined(hasNeon) || defined(hasSSE2)))
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#if !defined(hasNeon) && !defined(hasSSE2)
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-/**
- * @brief Get full lines.
- *
- * @param disc all discs on the board.
- * @param full all 1 if full line, otherwise all 0.
- */
-
-#if !defined(__ARM_NEON) && !defined(hasSSE2) && !defined(hasMMX)
-  #ifdef HAS_CPU_64
-
-static unsigned long long get_full_lines_h(unsigned long long full)
-{
-	full &= full >> 1;
-	full &= full >> 2;
-	full &= full >> 4;
-	return (full & 0x0101010101010101) * 0xff;
-}
-
-static unsigned long long get_full_lines_v(unsigned long long full)
-{
-	full &= (full >> 8) | (full << 56);	// ror 8
-	full &= (full >> 16) | (full << 48);	// ror 16
-	full &= (full >> 32) | (full << 32);	// ror 32
-	return full;
-}
-
-  #else
-
-static unsigned int get_full_lines_h_32(unsigned int full)
-{
-	full &= full >> 1;
-	full &= full >> 2;
-	full &= full >> 4;
-	return (full & 0x01010101) * 0xff;
-}
-
-static unsigned long long get_full_lines_h(unsigned long long full)
-{
-	return ((unsigned long long) get_full_lines_h_32(full >> 32) << 32) | get_full_lines_h_32(full);
-}
-
-static unsigned long long get_full_lines_v(unsigned long long full)
-{
-	unsigned int	t = (unsigned int) full & (unsigned int)(full >> 32);
-	t &= (t >> 16) | (t << 16);	// ror 16
-	t &= (t >> 8) | (t << 24);	// ror 8
-	return t | ((unsigned long long) t << 32);
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-unsigned long long get_all_full_lines(const unsigned long long disc, V4DI *full)
-=======
-=======
-  #endif
-
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-void get_all_full_lines(const unsigned long long disc, unsigned long long full[5])
->>>>>>> 4303b09 (Returns all full lines in full[4])
-=======
-static void get_full_lines(const unsigned long long disc, unsigned long long full[4])
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
-=======
-void get_full_lines(const unsigned long long disc, unsigned long long full[4])
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-{
-	unsigned long long l7, l9, r7, r9;	// full lines
-
-	full[0] = get_full_lines_h(disc);
-	full[1] = get_full_lines_v(disc);
-
-	l7 = r7 = disc;
-	l7 &= 0xff01010101010101 | (l7 >> 7);	r7 &= 0x80808080808080ff | (r7 << 7);
-	l7 &= 0xffff030303030303 | (l7 >> 14);	r7 &= 0xc0c0c0c0c0c0ffff | (r7 << 14);
-	l7 &= 0xffffffff0f0f0f0f | (l7 >> 28);	r7 &= 0xf0f0f0f0ffffffff | (r7 << 28);
-	full[3] = l7 & r7;
-
-	l9 = r9 = disc;
-	l9 &= 0xff80808080808080 | (l9 >> 9);	r9 &= 0x01010101010101ff | (r9 << 9);
-	l9 &= 0xffffc0c0c0c0c0c0 | (l9 >> 18);	r9 &= 0x030303030303ffff | (r9 << 18);
-	full[2] = l9 & r9 & (0x0f0f0f0ff0f0f0f0 | (l9 >> 36) | (r9 << 36));
-}
-#endif // __ARM_NEON/hasSSE2/hasMMX
-
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
 /**
-<<<<<<< HEAD
  * @brief Get stable edge.
  *
  * Compute the exact stable edges from precomputed tables.
@@ -1504,11 +786,7 @@ void get_full_lines(const unsigned long long disc, unsigned long long full[4])
  * @return a bitboard with (some of) player's stable discs.
  *
  */
-<<<<<<< HEAD
 unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
-=======
-static unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 {	// compute the exact stable edges (from precomputed tables)
 	return edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)]
 	    |  (unsigned long long) edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 56
@@ -1517,88 +795,6 @@ static unsigned long long get_stable_edge(const unsigned long long P, const unsi
 }
 
 /**
-<<<<<<< HEAD
-=======
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
- * @brief Estimate the stability.
- *
- * Count the number (in fact a lower estimate) of stable discs.
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return the number of stable discs.
- */
-#ifndef __AVX2__	// AVX2 version in board_sse.c
-  #if !(defined(hasMMX) && !defined(hasSSE2))	// MMX version of get_stability in board_mmx.c
-    #if !(defined(hasSSE2) && !defined(HAS_CPU_64))	// 32bit SSE version in board_sse.c
-// compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-int get_spreaded_stability(unsigned long long stable, unsigned long long P_central, unsigned long long full[4])
-{
-	unsigned long long stable_h, stable_v, stable_d7, stable_d9, old_stable;
-
-	if (stable == 0)	// (2%)
-		return 0;
-
-	do {
-		old_stable = stable;
-		stable_h = ((stable >> 1) | (stable << 1) | full[0]);
-		stable_v = ((stable >> 8) | (stable << 8) | full[1]);
-		stable_d9 = ((stable >> 9) | (stable << 9) | full[2]);
-		stable_d7 = ((stable >> 7) | (stable << 7) | full[3]);
-		stable |= (stable_h & stable_v & stable_d9 & stable_d7 & P_central);
-	} while (stable != old_stable);	// (44%)
-
-	return bit_count(stable);
-}
-    #endif
-
-// returns stability count only
-int get_stability(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long stable = get_stable_edge(P, O);	// compute the exact stable edges
-	unsigned long long P_central = P & 0x007e7e7e7e7e7e00;
-	unsigned long long full[4];
-
-	get_full_lines(P | O, full);	// add full lines
-	stable |= (P_central & full[0] & full[1] & full[2] & full[3]);
-
-	return get_spreaded_stability(stable, P_central, full);	// compute the other stable discs
-}
-
-// returns all full in full[4] in addition to stability count
-int get_stability_fulls(const unsigned long long P, const unsigned long long O, unsigned long long full[5])
-{
-	unsigned long long stable = get_stable_edge(P, O);	// compute the exact stable edges
-	unsigned long long P_central = P & 0x007e7e7e7e7e7e00;
-
-	get_full_lines(P | O, full);	// add full lines
-	full[4] = full[0] & full[1] & full[2] & full[3];
-	stable |= (P_central & full[4]);
-
-	return get_spreaded_stability(stable, P_central, full);	// compute the other stable discs
-}
-  #endif
-
-/**
- * @brief Get intersection of full lines.
- *
- * Get intersection of full lines.
- *
- * @param disc bitboard with occupied discs.
- * @return the intersection of full lines.
- */
-unsigned long long get_all_full_lines(const unsigned long long disc)
-{
-	unsigned long long full[4];
-	get_full_lines(disc, full);
-	return full[0] & full[1] & full[2] & full[3];
-}
-#endif // __AVX2__
-
-/**
-<<<<<<< HEAD
->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability)
  * @brief Estimate the stability of edges.
  *
  * Count the number (in fact a lower estimate) of stable discs on the edges.
@@ -1765,8 +961,6 @@ unsigned long long get_all_full_lines(const unsigned long long disc)
 #endif // __AVX2__
 
 /**
-=======
->>>>>>> dc7c79c (Omit unpack from get_edge_stability)
  * @brief Estimate corner stability.
  *
  * Count the number of stable discs around the corner. Limiting the count
@@ -1779,32 +973,12 @@ unsigned long long get_all_full_lines(const unsigned long long disc)
  */
 int get_corner_stability(const unsigned long long P)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef POPCOUNT
-	// stable = (((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P;
-  	unsigned int P2187 = (P >> 48) | (P << 16);	// ror 48
-	unsigned int stable = 0x00818100 & P2187;
-	stable |= ((((stable * 5) >> 1) & 0x00424200) | (stable << 8) | (stable >> 8)) & P2187;	// 1-8 alias does not matter since corner is stable anyway
-	return bit_count_32(stable);
-=======
-#if 0
-
-	const unsigned long long stable = ((((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P);
-	return bit_count(stable);
->>>>>>> 6506166 (More SSE optimizations)
-=======
 #ifdef POPCOUNT
 	// stable = (((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P;
   	unsigned int P2187 = (P >> 48) | (P << 16);	// ror 48
 	unsigned int stable = 0x00818100 & P2187;
 	stable |= ((((stable * 5) >> 1) & 0x00424200) | (stable << 8) | (stable >> 8)) & P2187;	// 1-8 alias does not matter since corner is stable anyway
 	return bit_count_32(stable);
-<<<<<<< HEAD
-  #endif
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
-=======
->>>>>>> 9078deb (new get_corner_stability for both 64&32 bit)
 
 #else	// kindergarten
 	static const char n_stable_h2a2h1g1b1a1[64] = {
@@ -1814,20 +988,7 @@ int get_corner_stability(const unsigned long long P)
 		0, 2, 0, 3, 0, 2, 0, 3, 2, 4, 2, 5, 3, 5, 3, 6
 	};
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #if 0 // defined(__BMI2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__)	// BMI2 CPU has POPCOUNT
-=======
-  #if 0 // defined(__BMI2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__)	// kindergarten for generic modern build
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
-=======
   #if 0 // defined(__BMI2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__)	// BMI2 CPU has POPCOUNT
->>>>>>> 9078deb (new get_corner_stability for both 64&32 bit)
 	int cnt = n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) vertical_mirror(P), 0x000081c3)]
 		+ n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3)];
 
@@ -1843,51 +1004,6 @@ int get_corner_stability(const unsigned long long P)
 		+ n_stable_h2a2h1g1b1a1[(((unsigned int) P & 0x000081c3) * 0x04410000) >> 26];
   #endif
 	// assert(cnt == bit_count((((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P));
-=======
-#if defined(__BMI2__) && defined(__x86_64__)
-=======
-#if 0 // defined(__BMI2__) && defined(__x86_64__) // pext is slow on AMD
-<<<<<<< HEAD
->>>>>>> f24cc06 (avoid BMI2 for AMD; more lzcnt/tzcnt in count_last_flip_bitscan)
-	int cnt = n_stable_h8g8b8a8h7a7[_pext_u64(P, 0xc381000000000000ULL)]
-		+ n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3U)];
-=======
-=======
-#ifdef USEPEXT // defined(__BMI2__) && defined(__x86_64__) && !defined(AMD_BEFORE_ZEN3)	// kindergarten for generic modern build
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-#ifdef USEPEXT // defined(__BMI2__) && defined(__x86_64__) && !defined(AMD_BEFORE_ZEN3)	// kindergarten for generic modern build
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-	int cnt = n_stable_h8g8b8a8h7a7[_pext_u64(P, 0xc381000000000000)]
-		+ n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3)];
->>>>>>> 6506166 (More SSE optimizations)
-#else
-	int cnt = n_stable_h8g8b8a8h7a7[(((unsigned int) (P >> 32) & 0xc3810000) * 0x00000411) >> 26]
-		+ n_stable_h2a2h1g1b1a1[(((unsigned int) P & 0x000081c3) * 0x04410000) >> 26];
-#endif
-<<<<<<< HEAD
-	// assert(cnt == bit_count((((0x0100000000000001ULL & P) << 1) | ((0x8000000000000080ULL & P) >> 1) | ((0x0000000000000081ULL & P) << 8) | ((0x8100000000000000ULL & P) >> 8) | 0x8100000000000081ULL) & P));
->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability)
-=======
-=======
-  #if 0 // defined(__BMI2__) && !defined(AMD_BEFORE_ZEN3)	// kindergarten for generic modern build
-	int cnt = n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) vertical_mirror(P), 0x000081c3)]
-		+ n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3)];
-
-  #else
-	static const char n_stable_h8g8b8a8h7a7[64] = {
-		0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 0, 2, 3, 2, 3,
-		0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 0, 2, 3, 2, 3,
-		1, 1, 2, 2, 2, 3, 3, 4, 1, 1, 2, 2, 3, 4, 4, 5,
-		2, 2, 3, 3, 3, 4, 4, 5, 2, 2, 3, 3, 4, 5, 5, 6
-	};
-
-	int cnt = n_stable_h8g8b8a8h7a7[(((unsigned int) (P >> 32) & 0xc3810000) * 0x00000411) >> 26]
-		+ n_stable_h2a2h1g1b1a1[(((unsigned int) P & 0x000081c3) * 0x04410000) >> 26];
-  #endif
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
-	// assert(cnt == bit_count((((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P));
->>>>>>> 6506166 (More SSE optimizations)
 	return cnt;
 
 #endif
@@ -1901,49 +1017,8 @@ int get_corner_stability(const unsigned long long P)
  */
 unsigned long long board_get_hash_code(const Board *board)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long crc = crc32c_u64(0, board->player);
-	return (crc << 32) | crc32c_u64(crc, board->opponent);
-=======
-	const unsigned char *p = (const unsigned char*)board;
-=======
-	const unsigned char *const p = (const unsigned char*)board;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	unsigned long long h1, h2;
-
-#if defined(USE_GAS_MMX) && defined(__3dNOW__)	// Faster on AMD but not suitable for CPU with slow emms
-	if (hasMMX)
-		return board_get_hash_code_mmx(p);
-#elif defined(USE_GAS_MMX) || defined(USE_MSVC_X86) // || defined(__x86_64__)
-	if (hasSSE2)
-		return board_get_hash_code_sse(p);
-#endif
-
-	h1  = hash_rank[0][p[0]];	h2  = hash_rank[1][p[1]];
-	h1 ^= hash_rank[2][p[2]];	h2 ^= hash_rank[3][p[3]];
-	h1 ^= hash_rank[4][p[4]];	h2 ^= hash_rank[5][p[5]];
-	h1 ^= hash_rank[6][p[6]];	h2 ^= hash_rank[7][p[7]];
-	h1 ^= hash_rank[8][p[8]];	h2 ^= hash_rank[9][p[9]];
-	h1 ^= hash_rank[10][p[10]];	h2 ^= hash_rank[11][p[11]];
-	h1 ^= hash_rank[12][p[12]];	h2 ^= hash_rank[13][p[13]];
-	h1 ^= hash_rank[14][p[14]];	h2 ^= hash_rank[15][p[15]];
-
-	// assert((h1 ^ h2) == board_get_hash_code_sse(p));
-
-	return h1 ^ h2;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	unsigned long long	crc;
-
-	crc = crc32c_u64(0, board->player);
-=======
 	unsigned long long crc = crc32c_u64(0, board->player);
->>>>>>> 0b8fa13 (More HBOARD hash functions)
 	return (crc << 32) | crc32c_u64(crc, board->opponent);
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
 }
 
 /**
@@ -2022,28 +1097,8 @@ void board_print(const Board *board, const int player, FILE *f)
 {
 	int i, j, square;
 	unsigned long long bk, wh;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	const char color[5] = "?*O-.";
 	unsigned long long moves = board_get_moves(board);
-<<<<<<< HEAD
-
-	if (player == BLACK) {
-		bk = board->player;
-		wh = board->opponent;
-	} else {
-		bk = board->opponent;
-		wh = board->player;
-	}
-=======
-	const char *color = "?*O-." + 1;
-=======
-	const char color[5] = "?*O-.";
->>>>>>> bc93772 (Avoid modern compliler warnings)
-	unsigned long long moves = get_moves(board->player, board->opponent);
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 
 	if (player == BLACK) {
 		bk = board->player;
@@ -2061,15 +1116,7 @@ void board_print(const Board *board, const int player, FILE *f)
 			square = 2 - (wh & 1) - 2 * (bk & 1);
 			if ((square == EMPTY) && (moves & 1))
 				square = EMPTY + 1;
-<<<<<<< HEAD
-<<<<<<< HEAD
-			fputc(color[square + 1], f);
-=======
-			fputc(color[square], f);
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
 			fputc(color[square + 1], f);
->>>>>>> bc93772 (Avoid modern compliler warnings)
 			fputc(' ', f);
 			bk >>= 1;
 			wh >>= 1;
diff --git a/src/board.h b/src/board.h
index 2b1cf85..2da07db 100644
--- a/src/board.h
+++ b/src/board.h
@@ -3,23 +3,7 @@
  *
  * Board management header file.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
- * @date 1998 - 2023
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-=======
- * @date 1998 - 2024
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -44,45 +28,16 @@ void board_init(Board*);
 int board_set(Board*, const char*);
 int board_from_FEN(Board*, const char*);
 bool board_lesser(const Board*, const Board*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 void board_horizontal_mirror(const Board *, Board *);
 void board_vertical_mirror(const Board *, Board *);
 void board_transpose(const Board *, Board *);
-=======
-bool board_equal(const Board*, const Board*);
->>>>>>> 8a7e354 (Exclude hash init time from count games; other minor size opts)
-=======
->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code)
-=======
-void board_horizontal_mirror(const Board *, Board *);
-void board_vertical_mirror(const Board *, Board *);
-void board_transpose(const Board *, Board *);
->>>>>>> 6bc747d (Split board_flip_* from board_symetry)
 void board_symetry(const Board*, const int, Board*);
 int board_unique(const Board*, Board*);
 void board_check(const Board*);
 void board_rand(Board*, int, struct Random*);
 
 // Compare two board for equality
-<<<<<<< HEAD
-<<<<<<< HEAD
-#define	board_equal(b1,b2)	((b1)->player == (b2)->player && (b1)->opponent == (b2)->opponent)
-=======
-#ifdef __AVX2__
-inline bool board_equal(const Board *b1, const Board *b2)
-{
-	__m128i b = _mm_xor_si128(_mm_loadu_si128((__m128i *) b1), _mm_loadu_si128((__m128i *) b2));
-	return _mm_testz_si128(b, b);
-}
-#else
 #define	board_equal(b1,b2)	((b1)->player == (b2)->player && (b1)->opponent == (b2)->opponent)
-#endif
->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code)
-=======
-#define	board_equal(b1,b2)	((b1)->player == (b2)->player && (b1)->opponent == (b2)->opponent)
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 
 int board_count_last_flips(const Board*, const int);
 unsigned long long board_get_move_flip(const Board*, const int, struct Move*);
@@ -91,9 +46,6 @@ void board_swap_players(Board*);
 void board_update(Board*, const struct Move*);
 void board_restore(Board*, const struct Move*);
 void board_pass(Board*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 
 bool can_move(const unsigned long long, const unsigned long long);
 unsigned long long get_moves_6x6(const unsigned long long, const unsigned long long);
@@ -101,63 +53,6 @@ bool can_move_6x6(const unsigned long long, const unsigned long long);
 int get_mobility(const unsigned long long, const unsigned long long);
 #ifdef __AVX2__
 	__m128i vectorcall get_moves_and_potential(__m256i, __m256i);
-<<<<<<< HEAD
-#else
-	unsigned long long get_potential_moves(const unsigned long long, const unsigned long long);
-#endif
-
-void edge_stability_init(void);
-unsigned long long get_stable_edge(const unsigned long long, const unsigned long long);
-#ifndef __AVX2__	// public for android dispatch
-	void get_full_lines(const unsigned long long, unsigned long long [4]);
-  #if !(defined(hasMMX) && !defined(hasSSE2))
-	int get_spreaded_stability(unsigned long long, unsigned long long, unsigned long long [4]);
-  #endif
-#endif
-unsigned long long get_all_full_lines(const unsigned long long);
-int get_stability(const unsigned long long, const unsigned long long);
-int get_stability_fulls(const unsigned long long, const unsigned long long, unsigned long long [5]);
-int get_edge_stability(const unsigned long long, const unsigned long long);
-int get_corner_stability(const unsigned long long);
-=======
-unsigned long long board_next(const Board*, const int, Board*);
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-=======
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-unsigned long long board_get_hash_code(const Board*);
-int board_get_square_color(const Board*, const int);
-bool board_is_occupied(const Board*, const int);
-void board_print(const Board*, const int, FILE*);
-char* board_to_string(const Board*, const int, char *);
-void board_print_FEN(const Board*, const int, FILE*);
-char* board_to_FEN(const Board*, const int, char*);
-bool board_is_pass(const Board*);
-bool board_is_game_over(const Board*);
-int board_count_empties(const Board *board);
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-	void init_mmx (void);
-	unsigned long long get_moves_mmx(const unsigned long long, const unsigned long long);
-	unsigned long long get_moves_sse(const unsigned long long, const unsigned long long);
-=======
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif defined(ANDROID) && !defined(__ARM_NEON) && !defined(hasSSE2)
-	void init_neon (void);
-	unsigned long long get_moves_sse(unsigned long long, unsigned long long);
-=======
-unsigned long long get_moves(const unsigned long long, const unsigned long long);
-=======
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-bool can_move(const unsigned long long, const unsigned long long);
-unsigned long long get_moves_6x6(const unsigned long long, const unsigned long long);
-bool can_move_6x6(const unsigned long long, const unsigned long long);
-int get_mobility(const unsigned long long, const unsigned long long);
-#ifdef __AVX2__
-__m128i vectorcall get_moves_and_potential(__m256i, __m256i);
-=======
->>>>>>> 0835dae (Reformat #if's)
 #else
 	unsigned long long get_potential_moves(const unsigned long long, const unsigned long long);
 #endif
@@ -195,123 +90,28 @@ int board_count_empties(const Board *board);
 	unsigned long long get_moves_sse(unsigned long long, unsigned long long);
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(USE_GAS_MMX) && defined(__3dNOW__)
-unsigned long long board_get_hash_code_mmx(const unsigned char *p);
-#elif defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-unsigned long long board_get_hash_code_sse(const unsigned char *p);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-#endif
-
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
-#ifdef __AVX2__
-__m128i vectorcall get_moves_and_potential(__m256i, __m256i);
-#endif
-
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
-=======
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
 extern unsigned char edge_stability[256 * 256];
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 // a1/a8/h1/h8 are already stable in horizontal line, so omit them in vertical line to ease kindergarten for CPU_64
 #if 0 // defined(__BMI2__) && defined(HAS_CPU_64) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // pdep is slow on AMD before Zen3
 	#define	unpackA2A7(x)	_pdep_u64((x), 0x0101010101010101)
 	#define	unpackH2H7(x)	_pdep_u64((x), 0x8080808080808080)
-=======
-/* Define function attributes directive when available */
-#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__)
-	#define	REGPARM	__attribute__((regparm(2)))
->>>>>>> e558fdb (Some cleanups for clang / android build)
 #else
 	#define	unpackA2A7(x)	((((x) & 0x7e) * 0x0000040810204080) & 0x0001010101010100)
 	#define	unpackH2H7(x)	((((x) & 0x7e) * 0x0002040810204000) & 0x0080808080808000)
 #endif
 
-<<<<<<< HEAD
 #if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_CARRY) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BITSCAN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32)
 	extern int (*count_last_flip[BOARD_SIZE + 1])(const unsigned long long);
-=======
-#if ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2))
-	extern int last_flip(int pos, unsigned long long P);
-#else
-	#if LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32
-		extern int (REGPARM *count_last_flip[BOARD_SIZE + 1])(const unsigned long long);
-	#else
-		extern int (*count_last_flip[BOARD_SIZE + 1])(const unsigned long long);
-	#endif
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-#if ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2))
-=======
-extern unsigned long long A1_A8[256];
-=======
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-
-// a1/a8/h1/h8 are already stable in horizontal line, so omit them in vertical line to ease kindergarten for CPU_64
-#if 0 // defined(__BMI2__) && defined(HAS_CPU_64) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // pdep is slow on AMD before Zen3
-	#define	unpackA2A7(x)	_pdep_u64((x), 0x0101010101010101)
-	#define	unpackH2H7(x)	_pdep_u64((x), 0x8080808080808080)
-#else
-	#define	unpackA2A7(x)	((((x) & 0x7e) * 0x0000040810204080) & 0x0001010101010100)
-	#define	unpackH2H7(x)	((((x) & 0x7e) * 0x0002040810204000) & 0x0080808080808000)
-#endif
-
-<<<<<<< HEAD
-#if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	extern int last_flip(int pos, unsigned long long P);
-#else
-=======
-#if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_CARRY) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BITSCAN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32)
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-	extern int (*count_last_flip[BOARD_SIZE + 1])(const unsigned long long);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	#define	last_flip(x,P)	count_last_flip[x](P)
 #else
 	extern int last_flip(int pos, unsigned long long P);
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512)
-<<<<<<< HEAD
-<<<<<<< HEAD
-	extern const V4DI lmask_v4[66], rmask_v4[66];
-<<<<<<< HEAD
 	extern __m128i vectorcall mm_Flip(const __m128i OP, int pos);
-<<<<<<< HEAD
 	inline __m128i vectorcall reduce_vflip(__m128i flip) { return _mm_or_si128(flip, _mm_shuffle_epi32(flip, 0x4e)); }
 	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_set_epi64x((O), (P)), (x)))))
-=======
-	extern __m256i vectorcall mm_Flip(const __m128i OP, int pos);
-	inline __m128i vectorcall reduce_vflip(__m256i flip4) {
-		__m128i flip2 = _mm_or_si128(_mm256_castsi256_si128(flip4), _mm256_extracti128_si256(flip4, 1));
-		return _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-	}
-=======
-	extern __m128i vectorcall mm_Flip(const __m128i OP, int pos);
-	inline __m128i vectorcall reduce_vflip(__m128i flip) { return _mm_or_si128(flip, _mm_shuffle_epi32(flip, 0x4e)); }
-<<<<<<< HEAD
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
-  #ifdef HAS_CPU_64
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi64(_mm_cvtsi64_si128(P), (O), 1), (x)))))
-  #else
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi32(_mm_insert_epi32(_mm_insert_epi32(\
-		_mm_cvtsi32_si128(P), ((P) >> 32), 1), (O), 2), (O >> 32), 3), (x)))))
-  #endif
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-=======
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_set_epi64x((O), (P)), (x)))))
->>>>>>> c228033 (Replace mm_flip OP param unpack with _mm_set_epi64x)
 	#define	board_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_loadu_si128((__m128i *) (board)), (x)))))
 	#define	vboard_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip((board).v2, (x)))))
 
@@ -347,149 +147,6 @@ extern unsigned long long A1_A8[256];
 	extern void init_flip_sse(void);
   #endif
 
-=======
-#if (MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX)
-	extern unsigned long long Flip(int, const unsigned long long, const unsigned long long);
-=======
-#if MOVE_GENERATOR == MOVE_GENERATOR_AVX
-=======
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512)
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-	extern __m128i vectorcall mm_Flip(const __m128i OP, int pos);
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(mm_Flip(_mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O)), (x))))
-=======
-=======
-	extern __m256i vectorcall mm_Flip(const __m128i OP, int pos);
-	inline __m128i vectorcall reduce_vflip(__m256i flip4) {
-		__m128i flip2 = _mm_or_si128(_mm256_castsi256_si128(flip4), _mm256_extracti128_si256(flip4, 1));
-		return _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-	}
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-  #ifdef HAS_CPU_64
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi64(_mm_cvtsi64_si128(P), (O), 1), (x)))))
-  #else
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi32(_mm_insert_epi32(_mm_insert_epi32(\
-		_mm_cvtsi32_si128(P), ((P) >> 32), 1), (O), 2), (O >> 32), 3), (x)))))
-  #endif
-<<<<<<< HEAD
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-	#define	board_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(mm_Flip(_mm_loadu_si128((__m128i *) (board)), (x))))
-	#define	vboard_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(mm_Flip((board), (x))))
-=======
-	#define	board_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_loadu_si128((__m128i *) (board)), (x)))))
-<<<<<<< HEAD
-	#define	vboard_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip((board), (x)))))
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
-	#define	vboard_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip((board).v2, (x)))))
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-
-#elif MOVE_GENERATOR == MOVE_GENERATOR_SSE
-	extern __m128i (vectorcall *mm_flip[BOARD_SIZE + 2])(const __m128i);
-	#define	Flip(x,P,O)	((unsigned long long) _mm_cvtsi128_si64(mm_flip[x](_mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O)))))
-	#define mm_Flip(OP,x)	mm_flip[x](OP)
-	#define reduce_vflip(x)	(x)
-	#define	board_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(mm_flip[x](_mm_loadu_si128((__m128i *) (board)))))
-	#define	vboard_flip(board,x)	((unsigned long long) _mm_cvtsi128_si64(mm_flip[x]((board).v2)))
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP
-	extern unsigned long long flip(int, const unsigned long long, const unsigned long long);
-	#define	Flip(x,P,O)	flip((x), (P), (O))
-	#define	board_flip(board,x)	flip((x), (board)->player, (board)->opponent)
-
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-	extern uint64x2_t mm_Flip(uint64x2_t OP, int pos);
-	#define	Flip(x,P,O)	vgetq_lane_u64(mm_Flip(vcombine_u64(vcreate_u64(P), vcreate_u64(O)), (x)), 0)
-	#define	board_flip(board,x)	vgetq_lane_u64(mm_Flip(vld1q_u64((uint64_t *) (board)), (x)), 0)
-	#define	vboard_flip(board,x)	vgetq_lane_u64(mm_Flip((board).v2, (x)), 0)
-
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-#elif MOVE_GENERATOR == MOVE_GENERATOR_32
-	extern unsigned long long (*flip[BOARD_SIZE + 2])(unsigned int, unsigned int, unsigned int, unsigned int);
-	#define Flip(x,P,O)	flip[x]((unsigned int)(P), (unsigned int)((P) >> 32), (unsigned int)(O), (unsigned int)((O) >> 32))
-  #ifdef __BIG_ENDIAN__
-	#define	board_flip(board,x)	flip[x]((unsigned int)((board)->player), ((unsigned int *) &(board)->player)[0], (unsigned int)((board)->opponent), ((unsigned int *) &(board)->opponent)[0])
-  #else
-	#define	board_flip(board,x)	flip[x]((unsigned int)((board)->player), ((unsigned int *) &(board)->player)[1], (unsigned int)((board)->opponent), ((unsigned int *) &(board)->opponent)[1])
-  #endif
-<<<<<<< HEAD
-	#if defined(USE_GAS_MMX) && !defined(hasSSE2)
-		extern void init_flip_sse(void);
-	#endif
-<<<<<<< HEAD
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-=======
-  #if defined(USE_GAS_MMX) && !defined(hasSSE2)
-	extern void init_flip_sse(void);
-  #endif
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 6506166 (More SSE optimizations)
-=======
-#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-	extern unsigned long long Flip(int , unsigned long long, unsigned long long);
-
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-#else
-  #if MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP
-	extern unsigned long long Flip(int, unsigned long long, unsigned long long);
-  #else
-	extern unsigned long long (*flip[BOARD_SIZE + 2])(const unsigned long long, const unsigned long long);
-	#define	Flip(x,P,O)	flip[x]((P), (O))
-<<<<<<< HEAD
-  #endif
-
-	#define	board_flip(board,x)	Flip((x), (board)->player, (board)->opponent)
-#endif
-
-#ifndef vboard_flip
-	#define	vboard_flip(vboard,x)	board_flip(&(vboard).board, (x))
-#endif
-
-// Use backup copy of search->board in a vector register if available (assume *pboard == vboard on entry)
-#ifdef hasSSE2
-	#define	vboard_update(pboard,vboard,move)	_mm_storeu_si128((__m128i *) (pboard), _mm_shuffle_epi32(_mm_xor_si128((vboard).v2, _mm_or_si128(_mm_set1_epi64x((move)->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))), 0x4e))
-#else
-	#define	vboard_update(pboard,vboard,move)	board_update((pboard), (move))
-#endif
-
-// Pass Board in a vector register to Flip
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)
-	unsigned long long vectorcall board_next_sse(__m128i OP, const int x, Board *next);
-	#define	board_next(board,x,next)	board_next_sse(_mm_loadu_si128((__m128i *) (board)), (x), (next))
-	#define vboard_next(vboard,x,next)	board_next_sse((vboard).v2, (x), (next))
-#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-	unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next);
-	#define	board_next(board,x,next)	board_next_neon(vld1q_u64((uint64_t *) (board)), (x), (next))
-	#define vboard_next(vboard,x,next)	board_next_neon((vboard).v2, (x), (next))
-#else
-	unsigned long long board_next(const Board *board, const int x, Board *next);
-	#define	vboard_next(vboard,x,next)	board_next(&(vboard).board, (x), (next))
-#endif
-
-// Pass vboard to get_moves if vectorcall available, otherwise board
-#if defined(__AVX2__) && (defined(_MSC_VER) || defined(__linux__))
-	unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO);
-	#define	get_moves(P,O)	get_moves_avx(_mm256_set1_epi64x(P), _mm256_set1_epi64x(O))
-	#define	board_get_moves(board)	get_moves_avx(_mm256_set1_epi64x((board)->player), _mm256_set1_epi64x((board)->opponent))
-	#define	vboard_get_moves(vboard)	get_moves_avx(_mm256_broadcastq_epi64((vboard).v2), _mm256_broadcastq_epi64(_mm_unpackhi_epi64((vboard).v2, (vboard).v2)))
-#else
-	unsigned long long get_moves(const unsigned long long, const unsigned long long);
-	#define	board_get_moves(board)	get_moves((board)->player, (board)->opponent)
-	#define	vboard_get_moves(vboard)	get_moves((vboard).board.player, (vboard).board.opponent)
-=======
-	#define	board_flip(board,x)	flip[x]((board)->player, (board)->opponent)
->>>>>>> 6506166 (More SSE optimizations)
-=======
 #else
   #if MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP
 	extern unsigned long long Flip(int, unsigned long long, unsigned long long);
@@ -499,7 +156,6 @@ extern unsigned long long A1_A8[256];
   #endif
 
 	#define	board_flip(board,x)	Flip((x), (board)->player, (board)->opponent)
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 #endif
 
 #ifndef vboard_flip
diff --git a/src/board_mmx.c b/src/board_mmx.c
index e4cccd0..a9dff19 100644
--- a/src/board_mmx.c
+++ b/src/board_mmx.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file board_mmx.c
  *
@@ -10,28 +6,9 @@
  * If both hasMMX and hasSSE2 are undefined, dynamic dispatching code
  * will be generated.  (This setting requires VC or GCC 4.4+)
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
  * @date 2014 - 2023
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 2014 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 2014 - 2021
-=======
- * @date 2014 - 2022
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
  */
 
 #include "bit.h"
@@ -39,27 +16,11 @@
 #include "board.h"
 #include "move.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifdef USE_GAS_MMX
   #ifndef hasMMX
 	#pragma GCC push_options
 	#pragma GCC target ("mmx")
   #endif
-=======
-#if !defined(hasSSE2) && defined(USE_GAS_MMX)
-#ifndef hasMMX
-	#pragma GCC push_options
-	#pragma GCC target ("mmx")
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#ifdef USE_GAS_MMX
-  #ifndef hasMMX
-	#pragma GCC push_options
-	#pragma GCC target ("mmx")
-  #endif
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 	#include <mmintrin.h>
 #endif
 
@@ -70,14 +31,6 @@ static const unsigned long long mask_33 = 0x3333333333333333ULL;
 static const unsigned long long mask_0F = 0x0f0f0f0f0f0f0f0fULL;
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#ifndef hasSSE2
-
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 #ifndef hasMMX
 bool	hasMMX = false;
 #endif
@@ -140,123 +93,6 @@ void init_mmx (void)
 		init_flip_sse();
 #endif
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#endif	// hasSSE2
-
-#ifdef hasMMX
-/**
- * @brief Update a board.
- *
- * Update a board by flipping its discs and updating every other data,
- * according to the 'move' description.
- *
- * @param board the board to modify
- * @param move  A Move structure describing the modification.
- */
-#if defined(hasSSE2) && !defined(__3dNOW__)	// Faster on CPU with slow emms
-
-void board_update(Board *board, const Move *move)
-{
-	__m128i	F = _mm_loadl_epi64((__m128i *) &move->flipped);
-	__m128i	OP = _mm_loadu_si128((__m128i *) board);
-	OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
-	_mm_storeu_si128((__m128i *) board, _mm_shuffle_epi32(OP, 0x4e));
-	board_check(board);
-}
-
-#elif defined(USE_MSVC_X86)
-
-void board_update(Board *board, const Move *move)
-{
-	__m64	F = *(__m64 *) &move->flipped;
-	__m64	P = _m_pxor(*(__m64 *) &board->player, _m_por(F, *(__m64 *) &X_TO_BIT[move->x]));
-	__m64	O = _m_pxor(*(__m64 *) &board->opponent, F);
-	*(__m64 *) &board->player = O;
-	*(__m64 *) &board->opponent = P;
-	_mm_empty();
-	board_check(board);
-}
-
-#else
-
-void board_update(Board *board, const Move *move)
-{
-	__asm__ (
-		"movq	%2, %%mm1\n\t"
-		"movq	%3, %%mm0\n\t"
-		"por	%%mm1, %%mm0\n\t"
-		"pxor	%0, %%mm0\n\t"
-		"pxor	%1, %%mm1\n\t"
-		"movq	%%mm0, %1\n\t"
-		"movq	%%mm1, %0\n\t"
-		"emms"
-	: "=m" (board->player), "=m" (board->opponent)
-	: "m" (move->flipped), "m" (X_TO_BIT[move->x])
-	: "mm0", "mm1");
-	board_check(board);
-}
-
-#endif
-
-/**
- * @brief Restore a board.
- *
- * Restore a board by un-flipping its discs and restoring every other data,
- * according to the 'move' description, in order to cancel a board_update_move.
- *
- * @param board board to restore.
- * @param move  a Move structure describing the modification.
- */
-#if defined(hasSSE2) && !defined(__3dNOW__)
-
-void board_restore(Board *board, const Move *move)
-{
-	__m128i	F = _mm_loadl_epi64((__m128i *) &move->flipped);
-	__m128i	OP = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e);
-	OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
-	_mm_storeu_si128((__m128i *) board, OP);
-	board_check(board);
-}
-
-#elif defined(USE_MSVC_X86)
-
-void board_restore(Board *board, const Move *move)
-{
-	__m64	F = *(__m64 *) &move->flipped;
-	__m64	P = *(__m64 *) &board->opponent;
-	__m64	O = *(__m64 *) &board->player;
-	*(__m64 *) &board->player = _m_pxor(P, _m_por(F, *(__m64 *) &X_TO_BIT[move->x]));
-	*(__m64 *) &board->opponent = _m_pxor(O, F);
-	_mm_empty();
-	board_check(board);
-}
-
-#else
-
-void board_restore(Board *board, const Move *move)
-{
-	__asm__ (
-		"movq	%2, %%mm1\n\t"
-		"movq	%3, %%mm0\n\t"
-		"por	%%mm1, %%mm0\n\t"
-		"pxor	%1, %%mm0\n\t"
-		"pxor	%0, %%mm1\n\t"
-		"movq	%%mm0, %0\n\t"
-		"movq	%%mm1, %1\n\t"
-		"emms"
-	: "=m" (board->player), "=m" (board->opponent)
-	: "m" (move->flipped), "m" (X_TO_BIT[move->x])
-	: "mm0", "mm1");
-	board_check(board);
-}
-
-#endif
-#endif // hasMMX
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 
 /**
  * @brief MMX translation of get_moves
@@ -266,31 +102,13 @@ void board_restore(Board *board, const Move *move)
  */
 #ifdef USE_MSVC_X86
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 unsigned long long get_moves_mmx(const unsigned long long P_, const unsigned long long O_)
-=======
-unsigned long long get_moves_mmx(unsigned long long P_, unsigned long long O_)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-unsigned long long get_moves_mmx(const unsigned long long P_, const unsigned long long O_)
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
 {
 	unsigned int movesL, movesH, mO1, flip1, pre1;
 	__m64	P, O, M, mO, flip, pre;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	P = _m_punpckldq(_m_from_int(P_), _m_from_int(P_ >> 32));
-	O = _m_punpckldq(_m_from_int(O_), _m_from_int(O_ >> 32));	mO1 = (unsigned int) O_ & 0x7e7e7e7e;
-=======
-	P = *(__m64 *) &P_;
-	O = *(__m64 *) &O_;						mO1 = (unsigned int) O_ & 0x7e7e7e7e;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	P = _m_punpckldq(_m_from_int(P_), _m_from_int(P_ >> 32));
 	O = _m_punpckldq(_m_from_int(O_), _m_from_int(O_ >> 32));	mO1 = (unsigned int) O_ & 0x7e7e7e7e;
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
 		/* shift = +8 */						/* shift = +1 */
 	flip = _m_pand(O, _m_psllqi(P, 8));				flip1  = mO1 & ((unsigned int) P_ << 1);
 	flip = _m_por(flip, _m_pand(O, _m_psllqi(flip, 8)));		flip1 |= mO1 & (flip1 << 1);
@@ -343,15 +161,7 @@ unsigned long long get_moves_mmx(const unsigned long long P_, const unsigned lon
 
 #else
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 unsigned long long get_moves_mmx(const unsigned long long P, const unsigned long long O)
-=======
-unsigned long long get_moves_mmx(unsigned long long P, unsigned long long O)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-unsigned long long get_moves_mmx(const unsigned long long P, const unsigned long long O)
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
 {
 	unsigned long long moves;
 	__asm__ (
@@ -496,11 +306,6 @@ unsigned long long get_moves_mmx(const unsigned long long P, const unsigned long
  * x 1.5 faster bench stability on 32-bit x86.
  *
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 #ifdef hasMMX
 static void get_full_lines(const unsigned long long disc_, unsigned long long full[4])
 {
@@ -578,174 +383,23 @@ int get_stability_fulls(unsigned long long P, unsigned long long O, unsigned lon
 			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), ((__m64 *) full)[1]);
 			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), ((__m64 *) full)[3]);
 			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), ((__m64 *) full)[2]);
-=======
-#ifdef USE_MSVC_X86
-
-unsigned long long get_all_full_lines_mmx(const unsigned long long disc_, V4DI *full)
-=======
-#if defined(hasMMX) && !defined(hasSSE2)
-<<<<<<< HEAD
-<<<<<<< HEAD
-unsigned long long get_all_full_lines(const unsigned long long disc_, V4DI *full)
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
-void get_all_full_lines(const unsigned long long disc_, unsigned long long full[5])
->>>>>>> 4303b09 (Returns all full lines in full[4])
-=======
-static void get_full_lines(const unsigned long long disc_, unsigned long long full[4])
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
-{
-	__m64	disc = *(__m64 *) &disc_;
-	__m64	full_l, full_r;
-	unsigned int	full_v;
-	const __m64	kFF = _m_pcmpeqb(disc, disc);
-	static const unsigned long long e7[] = { 0xff01010101010101, 0x80808080808080ff, 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff };
-	static const unsigned long long e9[] = { 0xff80808080808080, 0x01010101010101ff, 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0x0f0f0f0ff0f0f0f0 };
-
-	// get_full_lines_mmx(full_d7, disc, 7, e7);
-	full_l = _m_pand(disc, _m_por(((__m64 *) e7)[0], _m_psrlqi(disc, 7)));
-	full_r = _m_pand(disc, _m_por(((__m64 *) e7)[1], _m_psllqi(disc, 7)));
-	full_l = _m_pand(full_l, _m_por(((__m64 *) e7)[2], _m_psrlqi(full_l, 14)));
-	full_r = _m_pand(full_r, _m_por(((__m64 *) e7)[3], _m_psllqi(full_r, 14)));
-	full_l = _m_pand(full_l, _m_por(((__m64 *) e7)[4], _m_psrlqi(full_l, 28)));
-	full_r = _m_pand(full_r, _m_por(((__m64 *) e7)[5], _m_psllqi(full_r, 28)));
-	((__m64 *) full)[3] = _m_pand(full_l, full_r);
-
-	// get_full_lines_mmx(full_d9, disc, 9, e9);
-	full_l = _m_pand(disc, _m_por(((__m64 *) e9)[0], _m_psrlqi(disc, 9)));
-	full_r = _m_pand(disc, _m_por(((__m64 *) e9)[1], _m_psllqi(disc, 9)));
-	full_l = _m_pand(full_l, _m_por(((__m64 *) e9)[2], _m_psrlqi(full_l, 18)));
-	full_r = _m_pand(full_r, _m_por(((__m64 *) e9)[3], _m_psllqi(full_r, 18)));
-	((__m64 *) full)[2] = _m_pand(_m_pand(full_l, full_r), _m_por(((__m64 *) e9)[4], _m_por(_m_psrlqi(full_l, 36), _m_psllqi(full_r, 36))));
-
-	// get_full_lines_mmx(full_h, disc, 1, e1);
-	((__m64 *) full)[0] = _m_pcmpeqb(kFF, disc);
-	_mm_empty();
-
-	// get_full_lines_mmx(full_v, disc, 8, e8);
-	full_v = (unsigned int) disc_ & (unsigned int)(disc_ >> 32);
-	full_v &= (full_v >> 16) | (full_v << 16);	// ror 16
-	full_v &= (full_v >> 8) | (full_v << 24);	// ror 8
-	full[1] = full_v | ((unsigned long long) full_v << 32);
-}
-
-// returns all full in full[4] in addition to stability count
-int get_stability_fulls(unsigned long long P, unsigned long long O, unsigned long long full[5])
-{
-	__m64	P_central, stable, stable_h, stable_v, stable_d7, stable_d9, old_stable, m;
-	unsigned int	OL, OH, PL, PH, t, a1a8, h1h8, SL, SH;
-
-	get_full_lines(P | O, full);
-
-	OL = (unsigned int) O;	OH = (unsigned int)(O >> 32);
-	PL = (unsigned int) P;	PH = (unsigned int)(P >> 32);
-	SL = PL & 0x7f7f7f00;	SH = PH & 0x007f7f7f;
-	P_central = _m_punpckldq(_m_from_int(SL), _m_from_int(SH));
-
-	// P_central & allfull
-	full[4] = full[0] & full[1] & full[2] & full[3];
-	SL &= (unsigned int) full[4];
-	SH &= (unsigned int)(full[4] >> 32);
-
-	// compute the exact stable edges (from precomputed tables)
-	a1a8 = edge_stability[((((PL & 0x01010101) + ((PH & 0x01010101) << 4)) * 0x01020408) >> 24) * 256
-		+ ((((OL & 0x01010101) + ((OH & 0x01010101) << 4)) * 0x01020408) >> 24)];
-	h1h8 = edge_stability[((((PH & 0x80808080) + ((PL & 0x80808080) >> 4)) * 0x00204081) >> 24) * 256
-		+ ((((OH & 0x80808080) + ((OL & 0x80808080) >> 4)) * 0x00204081) >> 24)];
-	SL |= edge_stability[(PL & 0xff) * 256 + (OL & 0xff)]
-		| (((a1a8 & 0x0f) * 0x00204081) & 0x01010101)
-		| (((h1h8 & 0x0f) * 0x10204080) & 0x80808080);
-	SH |= (edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24)
-		| (((a1a8 >> 4) * 0x00204081) & 0x01010101)
-		| (((h1h8 >> 4) * 0x10204080) & 0x80808080);
-	stable = _m_punpckldq(_m_from_int(SL), _m_from_int(SH));
-
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-	t = SL | SH;
-	if (t) {
-		do {
-			old_stable = stable;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full_h);
-			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full_v);
-			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full_d7);
-			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full_d9);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-			stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full.v1[0]);
-			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full.v1[1]);
-			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full.v1[3]);
-			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full.v1[2]);
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-			stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full->v1[0]);
-			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full->v1[1]);
-			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full->v1[3]);
-			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full->v1[2]);
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
-			stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full[0]);
-			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full[1]);
-			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full[3]);
-			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full[2]);
->>>>>>> 4303b09 (Returns all full lines in full[4])
-=======
-			stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), ((__m64 *) full)[0]);
-			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), ((__m64 *) full)[1]);
-			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), ((__m64 *) full)[3]);
-			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), ((__m64 *) full)[2]);
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
 			stable = _m_por(stable, _m_pand(_m_pand(_m_pand(_m_pand(stable_h, stable_v), stable_d7), stable_d9), P_central));
 			m = _m_pxor(stable, old_stable);
 		} while (_m_to_int(_m_packsswb(m, m)) != 0);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
   #ifdef POPCOUNT
 		t = bit_count_32(_m_to_int(stable)) + bit_count_32(_m_to_int(_m_psrlqi(stable, 32)));
   #else
-=======
-#ifdef POPCOUNT
-	#ifdef _MSC_VER
-		t = __popcnt(_m_to_int(stable)) + __popcnt(_m_to_int(_m_psrlqi(stable, 32)));
-	#else
-		t = __builtin_popcount(_m_to_int(stable)) + __builtin_popcount(_m_to_int(_m_psrlqi(stable, 32)));
-	#endif
-#else
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-  #ifdef POPCOUNT
-		t = bit_count_32(_m_to_int(stable)) + bit_count_32(_m_to_int(_m_psrlqi(stable, 32)));
-  #else
->>>>>>> 30464b5 (add hash_prefetch to NWS_endgame)
 		m = _m_psubd(stable, _m_pand(_m_psrlqi(stable, 1), *(__m64 *) &mask_55));
 		m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33));
 		m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F);
 		t = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24;
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #endif
-=======
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
   #endif
->>>>>>> 30464b5 (add hash_prefetch to NWS_endgame)
 	}
 	_mm_empty();
 	return t;
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
 
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
 // returns stability count only
 int get_stability(const unsigned long long P, const unsigned long long O)
 {
@@ -754,1235 +408,7 @@ int get_stability(const unsigned long long P, const unsigned long long O)
 	return get_stability_fulls(P, O, full);
 }
 #endif // hasMMX
-=======
-#elif defined(USE_GAS_MMX) && !(defined(__clang__) && (__clang__major__ < 3))
-// LLVM ERROR: Unsupported asm: input constraint with a matching output constraint of incompatible type!
-=======
-#elif defined(USE_GAS_MMX)
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-
-#define	get_full_lines_mmx(result,disc,dir,edge)	__asm__ (\
-		"movq	%1, %%mm0\n\t"		"movq	%1, %%mm1\n\t"\
-		"psrlq	%2, %%mm0\n\t"		"psllq	%2, %%mm1\n\t"\
-		"por	%5, %%mm0\n\t"		"por	%6, %%mm1\n\t"\
-		"pand	%1, %%mm0\n\t"		"pand	%1, %%mm1\n\t"\
-		"movq	%%mm0, %%mm2\n\t"	"movq	%%mm1, %%mm3\n\t"\
-		"psrlq	%3, %%mm0\n\t"		"psllq	%3, %%mm1\n\t"\
-		"por	%7, %%mm0\n\t"		"por	%8, %%mm1\n\t"\
-		"pand	%%mm2, %%mm0\n\t"	"pand	%%mm3, %%mm1\n\t"\
-		"movq	%%mm0, %%mm2\n\t"	"pand	%%mm1, %%mm0\n\t"\
-		"psrlq	%4, %%mm2\n\t"		"psllq	%4, %%mm1\n\t"\
-		"por	%9, %%mm2\n\t"		"por	%10, %%mm1\n\t"\
-		"pand	%%mm2, %%mm0\n\t"	"pand	%%mm1, %%mm0\n\t"\
-		"movq	%%mm0, %0"\
-	: "=m" (result)\
-	: "y" (disc), "i" (dir), "i" (dir * 2), "i" (dir * 4),\
-	  "m" (edge[0]), "m" (edge[1]), "m" (edge[2]), "m" (edge[3]), "m" (edge[4]), "m" (edge[5])\
-	: "mm0", "mm1", "mm2", "mm3");
-
-unsigned long long get_all_full_lines_mmx(const unsigned long long disc_, V4DI *full)
-{
-	__m64	disc;
-	unsigned int	full_v;
-	static const unsigned long long e7[] = { 0xff01010101010101, 0x80808080808080ff, 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff };
-	static const unsigned long long e9[] = { 0xff80808080808080, 0x01010101010101ff, 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0xfffffffff0f0f0f0, 0x0f0f0f0fffffffff };
-
-	__asm__ (
-		"movd	%1, %0\n\t"
-		"punpckldq %2, %0\n\t"
-	: "=&y" (disc) : "m" (disc_), "m" (((unsigned int *)&disc_)[1]));
-
-	get_full_lines_mmx(full->ull[3], disc, 7, e7);
-	get_full_lines_mmx(full->ull[2], disc, 9, e9);
-
-	// get_full_lines_mmx(full_h, disc, 1, e1);
-	__asm__ (
-		"pcmpeqb %%mm0, %%mm0\n\t"
-		"pcmpeqb %1, %%mm0\n\t"
-		"movq	%%mm0, %0\n\t"
-		"emms"
-	: "=m" (full->ull[0]) : "y" (disc) : "mm0");
-
-	// get_full_lines_mmx(full_v, disc, 8, e8);
-	full_v = (unsigned int) disc_ & (unsigned int)(disc_ >> 32);
-	full_v &= (full_v >> 16) | (full_v << 16);	// ror 16
-	full_v &= (full_v >> 8) | (full_v << 24);	// ror 8
-	full->ull[1] = full_v | ((unsigned long long) full_v << 32);
-
-	return full->ull[0] & full->ull[1] & full->ull[2] & full->ull[3];
-}
-
-int get_stability_mmx(unsigned long long P, unsigned long long O)
-{
-	V4DI	full;
-	unsigned long long allfull;
-	__m64	P_central, stable;
-	unsigned int	OL, OH, PL, PH, t, a1a8, h1h8, SL, SH;
-
-	allfull = get_all_full_lines_mmx(P | O, &full);
-
-	// compute the exact stable edges (from precomputed tables)
-	OL = (unsigned int) O;	OH = (unsigned int)(O >> 32);
-	PL = (unsigned int) P;	PH = (unsigned int)(P >> 32);
-	a1a8 = edge_stability[((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24) * 256
-		+ ((((OL & 0x01010101u) + ((OH & 0x01010101u) << 4)) * 0x01020408u) >> 24)];
-	h1h8 = edge_stability[((((PH & 0x80808080u) + ((PL & 0x80808080u) >> 4)) * 0x00204081u) >> 24) * 256
-		+ ((((OH & 0x80808080u) + ((OL & 0x80808080u) >> 4)) * 0x00204081u) >> 24)];
-	SL = edge_stability[(PL & 0xff) * 256 + (OL & 0xff)]
-		| (((a1a8 & 0x0f) * 0x00204081) & 0x01010101)
-		| (((h1h8 & 0x0f) * 0x10204080) & 0x80808080);
-	SH = (edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24)
-		| (((a1a8 >> 4) * 0x00204081) & 0x01010101)
-		| (((h1h8 >> 4) * 0x10204080) & 0x80808080);
-
-	PL &= 0x7f7f7f00;
-	PH &= 0x007f7f7f;
-	SL |= (unsigned int) allfull & PL;
-	SH |= (unsigned int)(allfull >> 32) & PH;
-
-	__asm__(
-		"movd	%2, %0\n\t"		"movd	%4, %1\n\t"
-		"movd	%3, %%mm0\n\t"		"movd	%5, %%mm1\n\t"
-		"punpckldq %%mm0, %0\n\t"	"punpckldq %%mm1, %1\n\t"
-	: "=y" (P_central), "=y" (stable) : "g" (PL), "g" (PH), "g" (SL), "g" (SH) : "mm0", "mm1" );
-
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-	t = SL | SH;
-	if (t) {
-		do {
-			__asm__ (
-				"movq	%1, %%mm3\n\t"
-				"movq	%6, %1\n\t"
-				"movq	%%mm3, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"
-				"psrlq	$1, %%mm0\n\t"		"psllq	$1, %%mm1\n\t"		"movq	%%mm3, %%mm2\n\t"
-				"por	%%mm1, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"	"psrlq	$7, %%mm2\n\t"
-				"por	%2, %%mm0\n\t"		"psllq	$7, %%mm1\n\t"		"por	%%mm1, %%mm2\n\t"
-				"pand	%%mm0, %1\n\t"						"por	%4, %%mm2\n\t"
-				"movq	%%mm3, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"	"pand	%%mm2, %1\n\t"
-				"psrlq	$8, %%mm0\n\t"		"psllq	$8, %%mm1\n\t"		"movq	%%mm3, %%mm2\n\t"
-				"por	%%mm1, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"	"psrlq	$9, %%mm2\n\t"
-				"por	%3, %%mm0\n\t"		"psllq	$9, %%mm1\n\t"		"por	%%mm1, %%mm2\n\t"
-				"pand	%%mm0, %1\n\t"						"por	%5, %%mm2\n\t"
-												"pand	%%mm2, %1\n\t"
-				"por	%%mm3, %1\n\t"
-				"pxor	%1, %%mm3\n\t"
-				"packsswb %%mm3, %%mm3\n\t"
-				"movd	%%mm3, %0"
-			: "=g" (t), "+y" (stable)
-			: "m" (full.ull[0]), "m" (full.ull[1]), "m" (full.ull[3]), "m" (full.ull[2]), "y" (P_central)
-			: "mm0", "mm1", "mm2", "mm3");
-		} while (t);
-
-		// bit_count(stable)
-#ifdef POPCOUNT
-		__asm__ (
-			"movd	%1, %0\n\t"
-			"psrlq	$32, %1\n\t"
-			"movd	%1, %%edx\n\t"
-			"popcntl %0, %0\n\t"
-			"popcntl %%edx, %%edx\n\t"
-			"addl	%%edx, %0"
-		: "=&a" (t) : "y" (stable) : "edx");
-#else
-		__asm__ (
-	 		"movq	%1, %%mm0\n\t"
-			"psrlq	$1, %1\n\t"
-			"pand	%2, %1\n\t"
-			"psubd	%1, %%mm0\n\t"
-
-			"movq	%%mm0, %%mm1\n\t"
-			"psrlq	$2, %%mm0\n\t"
-			"pand	%3, %%mm1\n\t"
-			"pand	%3, %%mm0\n\t"
-			"paddd	%%mm1, %%mm0\n\t"
-
-			"movq	%%mm0, %%mm1\n\t"
-			"psrlq	$4, %%mm0\n\t"
-			"paddd	%%mm1, %%mm0\n\t"
-			"pand	%4, %%mm0\n\t"
-	#ifdef hasSSE2
-			"pxor	%%mm1, %%mm1\n\t"
-			"psadbw	%%mm1, %%mm0\n\t"
-			"movd	%%mm0, %0\n\t"
-	#else
-			"movq	%%mm0, %%mm1\n\t"
-			"psrlq	$32, %%mm0\n\t"
-			"paddb	%%mm1, %%mm0\n\t"
-
-			"movd	%%mm0, %0\n\t"
-			"imull	$0x01010101, %0, %0\n\t"
-			"shrl	$24, %0"
-	#endif
-		: "=a" (t) : "y" (stable), "m" (mask_55), "my" (mask_33), "m" (mask_0F) : "mm0", "mm1");
-#endif
-	}
-	__asm__ ( "emms" );
-	return t;
-}
-#endif // USE_MSVC_X86
-=======
-#endif // hasMMX
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-
-<<<<<<< HEAD
-/**
- * @brief MMX translation of get_potential_mobility
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return a count of potential moves.
- */
-#ifdef USE_MSVC_X86
-
-int get_potential_mobility_mmx(unsigned long long P, unsigned long long O)
-{
-	__m64	m, mO;
-	int	count;
-	static const unsigned long long mask_v = 0x00ffffffffffff00ULL;
-	// static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL;	// = mask_7e & mask_v
-  #ifdef POPCOUNT
-	int	mh, ml;
-  #else
-	static const unsigned long long mask_15 = 0x1555555555555515ULL;
-	static const unsigned long long mask_01 = 0x0100000000000001ULL;
-  #endif
-
-	mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_7e);
-	m = _m_por(_m_psllqi(mO, 1), _m_psrlqi(mO, 1));
-	mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_v);
-	m = _m_por(m, _m_por(_m_psllqi(mO, 8), _m_psrlqi(mO, 8)));
-	mO = _m_pand(mO, *(__m64 *) &mask_7e);
-	m = _m_por(m, _m_por(_m_psllqi(mO, 7), _m_psrlqi(mO, 7)));
-	m = _m_por(m, _m_por(_m_psllqi(mO, 9), _m_psrlqi(mO, 9)));
-	m = _m_pandn(_m_por(*(__m64 *) &O, *(__m64 *) &P), m);
-
-  #ifdef POPCOUNT
-	ml = _m_to_int(m);
-	mh = _m_to_int(_m_psrlqi(m, 32));
-	count = bit_count_32(ml) + bit_count_32(mh) + bit_count_32((ml & 0x00000081) + (mh & 0x81000000));
-  #else
-	m = _m_paddd(_m_psubd(m, _m_pand(_m_psrlqi(m, 1), *(__m64 *) &mask_15)), _m_pand(m, *(__m64 *) &mask_01));
-	m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33));
-	m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F);
-	count = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24;
-  #endif
-	_mm_empty();
-	return count;
-}
-
-#elif defined(USE_GAS_MMX)
-
-int get_potential_mobility_mmx(unsigned long long P, unsigned long long O)
-{
-	int	count;
-	static const unsigned long long mask_v = 0x00ffffffffffff00ULL;
-	// static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL;	// = mask_7e & mask_v
-  #ifndef POPCOUNT
-	static const unsigned long long mask_15 = 0x1555555555555515ULL;
-	static const unsigned long long mask_01 = 0x0100000000000001ULL;
-  #endif
-
-	__asm__ (
-		"movq	%3, %%mm2\n\t"		"movq	%4, %%mm5\n\t"
-		"pand	%2, %%mm2\n\t"		"pand	%2, %%mm5\n\t"		"movq	%%mm2, %%mm3\n\t"
-		"movq	%%mm2, %%mm4\n\t"	"movq	%%mm5, %%mm6\n\t"	"pand	%%mm5, %%mm3\n\t"
-		"psllq	$1, %%mm2\n\t"		"psllq	$8, %%mm5\n\t"
-		"psrlq	$1, %%mm4\n\t"		"psrlq	$8, %%mm6\n\t"
-		"por	%%mm4, %%mm2\n\t"	"por	%%mm6, %%mm5\n\t"
-		"por	%%mm5, %%mm2\n\t"
-		"movq	%%mm3, %%mm5\n\t"
-		"movq	%%mm3, %%mm4\n\t"	"movq	%%mm5, %%mm6\n\t"
-		"psllq	$7, %%mm3\n\t"		"psllq	$9, %%mm5\n\t"
-		"psrlq	$7, %%mm4\n\t"		"psrlq	$9, %%mm6\n\t"
-		"por	%%mm4, %%mm3\n\t"	"por	%%mm6, %%mm5\n\t"
-		"por	%%mm3, %%mm2\n\t"	"por	%%mm5, %%mm2\n\t"
-		"por	%1, %2\n\t"
-		"pandn	%%mm2, %2\n\t"
-
-  #ifdef POPCOUNT
-		"movd	%2, %%ecx\n\t"
-		"popcntl %%ecx, %0\n\t"		"andl	$0x00000081, %%ecx\n\t"
-		"psrlq	$32, %2\n\t"		"popcntl %%ecx, %%ecx\n\t"
-		"movd	%2, %%edx\n\t"		"addl	%%ecx, %0\n\t"
-		"popcntl %%edx, %%ecx\n\t"	"andl	$0x81000000, %%edx\n\t"
-		"addl	%%ecx, %0\n\t"		"popcntl %%edx, %%edx\n\t"
-						"addl	%%edx, %0\n\t"
-		"emms"
-	: "=g" (count) : "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v)
-	: "ecx", "edx", "mm2", "mm3", "mm4", "mm5", "mm6");
-
-  #else
-		"movq	%2, %1\n\t"		"movq	%2, %%mm2\n\t"
-		"psrlq	$1, %2\n\t"
-		"pand	%5, %2\n\t"		"pand	%6, %%mm2\n\t"
-		"psubd	%2, %1\n\t"		"paddd	%%mm2, %1\n\t"
-
-		"movq	%1, %2\n\t"
-		"psrlq	$2, %1\n\t"
-		"pand	%7, %2\n\t"
-		"pand	%7, %1\n\t"
-		"paddd	%2, %1\n\t"
-
-		"movq	%1, %2\n\t"
-		"psrlq	$4, %1\n\t"
-		"paddd	%2, %1\n\t"
-		"pand	%8, %1\n\t"
-	#ifdef hasSSE2
-		"pxor	%2, %2\n\t"
-		"psadbw	%2, %1\n\t"
-		"movd	%1, %0\n\t"
-	#else
-		"movq	%1, %2\n\t"
-		"psrlq	$32, %1\n\t"
-		"paddb	%2, %1\n\t"
-
-		"movd	%1, %0\n\t"
-		"imull	$0x01010101, %0, %0\n\t"
-		"shrl	$24, %0\n\t"
-	#endif
-		"emms"
-	: "=g" (count)
-	: "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v),
-	  "m" (mask_15), "m" (mask_01), "m" (mask_33), "m" (mask_0F)
-	: "mm2", "mm3", "mm4", "mm5", "mm6");
-  #endif
-
-	return count;
-}
-#endif
-
-<<<<<<< HEAD
-/**
- * @brief MMX translation of board_get_hash_code.
- *
- * @param p pointer to 16 bytes to hash.
- * @return the hash code of the bitboard
- */
-
-#if defined(USE_GAS_MMX) && defined(__3dNOW__)
-
-unsigned long long board_get_hash_code_mmx(const unsigned char *p)
-{
-	unsigned long long h;
-
-	__asm__ volatile (
-		"movq	%0, %%mm0\n\t"		"movq	%1, %%mm1"
-	: : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]]));
-	__asm__ volatile (
-		"pxor	%1, %%mm0\n\t"		"pxor	%2, %%mm1\n\t"
-		"pxor	%%mm1, %%mm0\n\t"
-		"movd	%%mm0, %%eax\n\t"
-		"punpckhdq %%mm0, %%mm0\n\t"
-		"movd	%%mm0, %%edx\n\t"
-		"emms"
-	: "=A" (h)
-	: "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]])
-	: "mm0", "mm1");
-
-	return h;
-}
-
-#endif // __3dNOW
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 #if !defined(hasMMX) && defined(USE_GAS_MMX)
 	#pragma GCC pop_options
 #endif
-<<<<<<< HEAD
-=======
-/**
- * @file board_mmx.c
- *
- * MMX translation of some board.c functions for X86-32
- *
- * If both hasMMX and hasSSE2 are undefined, dynamic dispatching code
- * will be generated.  (This setting requires VC or GCC 4.4+)
- *
- * @date 2014 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include "bit.h"
-#include "hash.h"
-#include "board.h"
-#include "move.h"
-
-#if !defined(hasSSE2) && defined(USE_GAS_MMX)
-#ifndef hasMMX
-	#pragma GCC push_options
-	#pragma GCC target ("mmx")
-#endif
-	#include <mmintrin.h>
-#endif
-
-static const unsigned long long mask_7e = 0x7e7e7e7e7e7e7e7eULL;
-#ifndef POPCOUNT
-static const unsigned long long mask_55 = 0x5555555555555555ULL;
-static const unsigned long long mask_33 = 0x3333333333333333ULL;
-static const unsigned long long mask_0F = 0x0f0f0f0f0f0f0f0fULL;
-#endif
-
-#ifndef hasSSE2
-
-#ifndef hasMMX
-bool	hasMMX = false;
-#endif
-bool	hasSSE2 = false;
-
-void init_mmx (void)
-{
-	int	flg1, flg2, cpuid_edx, cpuid_ecx;
-#ifdef USE_MSVC_X86
-	int	cpuinfo[4];
-
-	__asm {
-		pushfd
-		pop	eax
-		mov	flg2, eax
-		btc	eax, 21
-		push	eax
-		popfd
-		pushfd
-		pop	flg1
-	}
-
-	if (flg1 == flg2)	/* CPUID not supported */
-		return;
-
-	__cpuid(cpuinfo, 1);
-	cpuid_edx = cpuinfo[3];
-	cpuid_ecx = cpuinfo[2];
-
-#else
-	__asm__ (
-		"pushfl\n\t"
-		"popl	%0\n\t"
-		"movl	%0, %1\n\t"
-		"btc	$21, %0\n\t"	/* flip ID bit in EFLAGS */
-		"pushl	%0\n\t"
-		"popfl\n\t"
-		"pushfl\n\t"
-		"popl	%0"
-	: "=r" (flg1), "=r" (flg2) );
-
-	if (flg1 == flg2)	/* CPUID not supported */
-		return;
-
-	__asm__ (
-		"movl	$1, %%eax\n\t"
-		"cpuid"
-	: "=d" (cpuid_edx), "=c" (cpuid_ecx) :: "%eax", "%ebx" );
-
-#endif
-
-#ifndef hasMMX
-	hasMMX  = ((cpuid_edx & 0x00800000u) != 0);
-#endif
-	hasSSE2 = ((cpuid_edx & 0x04000000u) != 0);
-	// hasPOPCNT = ((cpuid_ecx & 0x00800000u) != 0);
-
-#if (MOVE_GENERATOR == MOVE_GENERATOR_32)
-	if (hasSSE2)
-		init_flip_sse();
-#endif
-}
-#endif	// hasSSE2
-
-#ifdef hasMMX
-/**
- * @brief Update a board.
- *
- * Update a board by flipping its discs and updating every other data,
- * according to the 'move' description.
- *
- * @param board the board to modify
- * @param move  A Move structure describing the modification.
- */
-#if defined(hasSSE2) && !defined(__3dNOW__)	// Faster on CPU with slow emms
-
-void board_update(Board *board, const Move *move)
-{
-	__m128i	F = _mm_loadl_epi64((__m128i *) &move->flipped);
-	__m128i	OP = _mm_loadu_si128((__m128i *) board);
-	OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
-	_mm_storel_pi((__m64 *) &board->opponent, _mm_castsi128_ps(OP));
-	_mm_storeh_pi((__m64 *) &board->player, _mm_castsi128_ps(OP));
-	board_check(board);
-}
-
-#elif defined(USE_MSVC_X86)
-
-void board_update(Board *board, const Move *move)
-{
-	__m64	F = *(__m64 *) &move->flipped;
-	__m64	P = _m_pxor(*(__m64 *) &board->player, _m_por(F, *(__m64 *) &X_TO_BIT[move->x]));
-	__m64	O = _m_pxor(*(__m64 *) &board->opponent, F);
-	*(__m64 *) &board->player = O;
-	*(__m64 *) &board->opponent = P;
-	_mm_empty();
-	board_check(board);
-}
-
-#else
-
-void board_update(Board *board, const Move *move)
-{
-	__asm__ (
-		"movq	%2, %%mm1\n\t"
-		"movq	%3, %%mm0\n\t"
-		"por	%%mm1, %%mm0\n\t"
-		"pxor	%0, %%mm0\n\t"
-		"pxor	%1, %%mm1\n\t"
-		"movq	%%mm0, %1\n\t"
-		"movq	%%mm1, %0\n\t"
-		"emms"
-	: "=m" (board->player), "=m" (board->opponent)
-	: "m" (move->flipped), "m" (x_to_bit(move->x))
-	: "mm0", "mm1");
-	board_check(board);
-}
-
-#endif
-
-/**
- * @brief Restore a board.
- *
- * Restore a board by un-flipping its discs and restoring every other data,
- * according to the 'move' description, in order to cancel a board_update_move.
- *
- * @param board board to restore.
- * @param move  a Move structure describing the modification.
- */
-#if defined(hasSSE2) && !defined(__3dNOW__)
-
-void board_restore(Board *board, const Move *move)
-{
-	__m128i	F = _mm_loadl_epi64((__m128i *) &move->flipped);
-	__m128i	OP = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *) &board->opponent), _mm_loadl_epi64((__m128i *) &board->player));
-	OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
-	_mm_storeu_si128((__m128i *) board, OP);
-	board_check(board);
-}
-
-#elif defined(USE_MSVC_X86)
-
-void board_restore(Board *board, const Move *move)
-{
-	__m64	F = *(__m64 *) &move->flipped;
-	__m64	P = *(__m64 *) &board->opponent;
-	__m64	O = *(__m64 *) &board->player;
-	*(__m64 *) &board->player = _m_pxor(P, _m_por(F, *(__m64 *) &X_TO_BIT[move->x]));
-	*(__m64 *) &board->opponent = _m_pxor(O, F);
-	_mm_empty();
-	board_check(board);
-}
-
-#else
-
-void board_restore(Board *board, const Move *move)
-{
-	__asm__ (
-		"movq	%2, %%mm1\n\t"
-		"movq	%3, %%mm0\n\t"
-		"por	%%mm1, %%mm0\n\t"
-		"pxor	%1, %%mm0\n\t"
-		"pxor	%0, %%mm1\n\t"
-		"movq	%%mm0, %0\n\t"
-		"movq	%%mm1, %1\n\t"
-		"emms"
-	: "=m" (board->player), "=m" (board->opponent)
-	: "m" (move->flipped), "m" (x_to_bit(move->x))
-	: "mm0", "mm1");
-	board_check(board);
-}
-
-#endif
-#endif // hasMMX
-
-/**
- * @brief MMX translation of get_moves
- *
- * x 2 faster bench mobility on 32-bit x86.
- *
- */
-#ifdef USE_MSVC_X86
-
-unsigned long long get_moves_mmx(unsigned long long P_, unsigned long long O_)
-{
-	unsigned int movesL, movesH, mO1, flip1, pre1;
-	__m64	P, O, M, mO, flip, pre;
-
-	P = *(__m64 *) &P_;
-	O = *(__m64 *) &O_;						mO1 = (unsigned int) O_ & 0x7e7e7e7e;
-		/* shift = +8 */						/* shift = +1 */
-	flip = _m_pand(O, _m_psllqi(P, 8));				flip1  = mO1 & ((unsigned int) P_ << 1);
-	flip = _m_por(flip, _m_pand(O, _m_psllqi(flip, 8)));		flip1 |= mO1 & (flip1 << 1);
-	pre  = _m_pand(O, _m_psllqi(O, 8));				pre1   = mO1 & (mO1 << 1);
-	flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 16)));		flip1 |= pre1 & (flip1 << 2);
-	flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 16)));		flip1 |= pre1 & (flip1 << 2);
-	M = _m_psllqi(flip, 8);						movesL = flip1 << 1;
-		/* shift = -8 */						/* shift = -1 */
-	flip = _m_pand(O, _m_psrlqi(P, 8));				flip1  = mO1 & ((unsigned int) P_ >> 1);
-	flip = _m_por(flip, _m_pand(O, _m_psrlqi(flip, 8)));		flip1 |= mO1 & (flip1 >> 1);
-	pre  = _m_psrlqi(pre, 8);					pre1 >>= 1;
-	flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 16)));		flip1 |= pre1 & (flip1 >> 2);
-	flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 16)));		flip1 |= pre1 & (flip1 >> 2);
-	M = _m_por(M, _m_psrlqi(flip, 8));				movesL |= flip1 >> 1;
-		/* shift = +7 */
-	mO = _m_pand(O, *(__m64 *) &mask_7e);				mO1 = (unsigned int)(O_ >> 32) & 0x7e7e7e7e;
-	flip = _m_pand(mO, _m_psllqi(P, 7));
-	flip = _m_por(flip, _m_pand(mO, _m_psllqi(flip, 7)));
-	pre  = _m_pand(mO, _m_psllqi(mO, 7));
-	flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 14)));
-	flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 14)));
-	M = _m_por(M, _m_psllqi(flip, 7));
-		/* shift = -7 */						/* shift = +1 */
-	flip = _m_pand(mO, _m_psrlqi(P, 7));				flip1  = mO1 & ((unsigned int)(P_ >> 32) << 1);
-	flip = _m_por(flip, _m_pand(mO, _m_psrlqi(flip, 7)));		flip1 |= mO1 & (flip1 << 1);
-	pre  = _m_psrlqi(pre, 7);					pre1   = mO1 & (mO1 << 1);
-	flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 14)));		flip1 |= pre1 & (flip1 << 2);
-	flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 14)));		flip1 |= pre1 & (flip1 << 2);
-	M = _m_por(M, _m_psrlqi(flip, 7));				movesH = flip1 << 1;
-		/* shift = +9 */						/* shift = -1 */
-	flip = _m_pand(mO, _m_psllqi(P, 9));				flip1  = mO1 & ((unsigned int)(P_ >> 32) >> 1);
-	flip = _m_por(flip, _m_pand(mO, _m_psllqi(flip, 9)));		flip1 |= mO1 & (flip1 >> 1);
-	pre  = _m_pand(mO, _m_psllqi(mO, 9));				pre1 >>= 1;
-	flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 18)));		flip1 |= pre1 & (flip1 >> 2);
-	flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 18)));		flip1 |= pre1 & (flip1 >> 2);
-	M = _m_por(M, _m_psllqi(flip, 9));				movesH |= flip1 >> 1;
-		/* shift = -9 */
-	flip = _m_pand(mO, _m_psrlqi(P, 9));
-	flip = _m_por(flip, _m_pand(mO, _m_psrlqi(flip, 9)));
-	pre  = _m_psrlqi(pre, 9);
-	flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 18)));
-	flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 18)));
-	M = _m_por(M, _m_psrlqi(flip, 9));
-
-	movesL |= _m_to_int(M);
-	movesH |= _m_to_int(_m_punpckhdq(M, M));
-	_mm_empty();
-	return (((unsigned long long) movesH << 32) | movesL) & ~(P_|O_);	// mask with empties
-}
-
-#else
-
-unsigned long long get_moves_mmx(unsigned long long P, unsigned long long O)
-{
-	unsigned long long moves;
-	__asm__ (
-		"movl	%1, %%ebx\n\t"		"movd	%1, %%mm4\n\t"		// (movd for store-forwarding)
-		"movl	%3, %%edi\n\t"		"movd	%3, %%mm5\n\t"
-		"andl	$0x7e7e7e7e, %%edi\n\t"	"punpckldq %2, %%mm4\n\t"
-						"punpckldq %4, %%mm5\n\t"
-				/* shift=-1 */			/* shift=-8 */
-		"movl	%%ebx, %%eax\n\t"	"movq	%%mm4, %%mm0\n\t"
-		"shrl	$1, %%eax\n\t"		"psrlq	$8, %%mm0\n\t"
-		"andl	%%edi, %%eax\n\t"	"pand	%%mm5, %%mm0\n\t"	// 0 m7&o6 m6&o5 .. m1&o0
-		"movl	%%eax, %%edx\n\t"	"movq	%%mm0, %%mm1\n\t"
-		"shrl	$1, %%eax\n\t"		"psrlq	$8, %%mm0\n\t"
-		"movl	%%edi, %%ecx\n\t"	"movq	%%mm5, %%mm3\n\t"
-		"andl	%%edi, %%eax\n\t"	"pand	%%mm5, %%mm0\n\t"	// 0 0 m7&o6&o5 .. m2&o1&o0
-		"shrl	$1, %%ecx\n\t"		"psrlq	$8, %%mm3\n\t"
-		"orl	%%edx, %%eax\n\t"	"por	%%mm1, %%mm0\n\t"	// 0 m7&o6 (m6&o5)|(m7&o6&o5) .. (m1&o0)
-		"andl	%%edi, %%ecx\n\t"	"pand	%%mm5, %%mm3\n\t"	// 0 o7&o6 o6&o5 o5&o4 o4&o3 ..
-		"movl	%%eax, %%edx\n\t"	"movq	%%mm0, %%mm2\n\t"
-		"shrl	$2, %%eax\n\t"		"psrlq	$16, %%mm0\n\t"
-		"andl	%%ecx, %%eax\n\t"	"pand	%%mm3, %%mm0\n\t"	// 0 0 0 m7&o6&o5&o4 (m6&o5&o4&o3)|(m7&o6&o5&o4&o3) ..
-		"orl	%%eax, %%edx\n\t"	"por	%%mm0, %%mm2\n\t"
-		"shrl	$2, %%eax\n\t"		"psrlq	$16, %%mm0\n\t"
-		"andl	%%ecx, %%eax\n\t"	"pand	%%mm3, %%mm0\n\t"	// 0 0 0 0 0 m7&o6&..&o2 (m6&o5&..&o1)|(m7&o6&..&o1) ..
-		"orl	%%edx, %%eax\n\t"	"por	%%mm0, %%mm2\n\t"
-		"shrl	$1, %%eax\n\t"		"psrlq	$8, %%mm2\n\t"
-				/* shift=+1 */			/* shift=+8 */
-						"movq	%%mm4, %%mm0\n\t"
-		"addl	%%ebx, %%ebx\n\t"	"psllq	$8, %%mm0\n\t"
-		"andl	%%edi, %%ebx\n\t"	"pand	%%mm5, %%mm0\n\t"
-		"movl	%%ebx, %%edx\n\t"	"movq	%%mm0, %%mm1\n\t"
-		"addl	%%ebx, %%ebx\n\t"	"psllq	$8, %%mm0\n\t"
-		"andl	%%edi, %%ebx\n\t"	"pand	%%mm5, %%mm0\n\t"
-		"orl	%%ebx, %%edx\n\t"	"por	%%mm1, %%mm0\n\t"
-		"addl	%%ecx, %%ecx\n\t"	"psllq	$8, %%mm3\n\t"
-						"movq	%%mm0, %%mm1\n\t"
-		"leal	(,%%edx,4), %%ebx\n\t"	"psllq	$16, %%mm0\n\t"
-		"andl	%%ecx, %%ebx\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"orl	%%ebx, %%edx\n\t"	"por	%%mm0, %%mm1\n\t"
-		"shll	$2, %%ebx\n\t"		"psllq	$16, %%mm0\n\t"
-		"andl	%%ecx, %%ebx\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"orl	%%edx, %%ebx\n\t"	"por	%%mm1, %%mm0\n\t"
-		"addl	%%ebx, %%ebx\n\t"	"psllq	$8, %%mm0\n\t"
-		"orl	%%eax, %%ebx\n\t"	"por	%%mm0, %%mm2\n\t"
-								/* shift=-7 */
-						"pand	%5, %%mm5\n\t"
-						"movq	%%mm4, %%mm0\n\t"
-						"psrlq	$7, %%mm0\n\t"
-						"pand	%%mm5, %%mm0\n\t"
-						"movq	%%mm0, %%mm1\n\t"
-						"psrlq	$7, %%mm0\n\t"
-						"pand	%%mm5, %%mm0\n\t"
-						"movq	%%mm5, %%mm3\n\t"
-						"por	%%mm1, %%mm0\n\t"
-						"psrlq	$7, %%mm3\n\t"
-						"movq	%%mm0, %%mm1\n\t"
-						"pand	%%mm5, %%mm3\n\t"
-						"psrlq	$14, %%mm0\n\t"
-						"pand	%%mm3, %%mm0\n\t"
-		"movl	%2, %%esi\n\t"		"por	%%mm0, %%mm1\n\t"
-		"movl	%4, %%edi\n\t"		"psrlq	$14, %%mm0\n\t"
-		"andl	$0x7e7e7e7e,%%edi\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"movl	%%edi, %%ecx\n\t"	"por	%%mm1, %%mm0\n\t"
-		"shrl	$1, %%ecx\n\t"		"psrlq	$7, %%mm0\n\t"
-		"andl	%%edi, %%ecx\n\t"	"por	%%mm0, %%mm2\n\t"
-				/* shift=-1 */			/* shift=+7 */
-		"movl	%%esi, %%eax\n\t"	"movq	%%mm4, %%mm0\n\t"
-		"shrl	$1, %%eax\n\t"		"psllq	$7, %%mm0\n\t"
-		"andl	%%edi, %%eax\n\t"	"pand	%%mm5, %%mm0\n\t"
-		"movl	%%eax, %%edx\n\t"	"movq	%%mm0, %%mm1\n\t"
-		"shrl	$1, %%eax\n\t"		"psllq	$7, %%mm0\n\t"
-		"andl	%%edi, %%eax\n\t"	"pand	%%mm5, %%mm0\n\t"
-		"orl	%%edx, %%eax\n\t"	"por	%%mm1, %%mm0\n\t"
-						"psllq	$7, %%mm3\n\t"
-		"movl	%%eax, %%edx\n\t"	"movq	%%mm0, %%mm1\n\t"
-		"shrl	$2, %%eax\n\t"		"psllq	$14, %%mm0\n\t"
-		"andl	%%ecx, %%eax\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"orl	%%eax, %%edx\n\t"	"por	%%mm0, %%mm1\n\t"
-		"shrl	$2, %%eax\n\t"		"psllq	$14, %%mm0\n\t"
-		"andl	%%ecx, %%eax\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"orl	%%edx, %%eax\n\t"	"por	%%mm1, %%mm0\n\t"
-		"shrl	$1, %%eax\n\t"		"psllq	$7, %%mm0\n\t"
-						"por	%%mm0, %%mm2\n\t"
-				/* shift=+1 */			/* shift=-9 */
-						"movq	%%mm4, %%mm0\n\t"
-		"addl	%%esi, %%esi\n\t"	"psrlq	$9, %%mm0\n\t"
-		"andl	%%edi, %%esi\n\t"	"pand	%%mm5, %%mm0\n\t"
-		"movl	%%esi, %%edx\n\t"	"movq	%%mm0, %%mm1\n\t"
-		"addl	%%esi, %%esi\n\t"	"psrlq	$9, %%mm0\n\t"
-		"andl	%%edi, %%esi\n\t"	"pand	%%mm5, %%mm0\n\t"
-						"movq	%%mm5, %%mm3\n\t"
-		"orl	%%esi, %%edx\n\t"	"por	%%mm1, %%mm0\n\t"
-						"psrlq	$9, %%mm3\n\t"
-						"movq	%%mm0, %%mm1\n\t"
-		"addl	%%ecx, %%ecx\n\t"	"pand	%%mm5, %%mm3\n\t"
-		"leal	(,%%edx,4), %%esi\n\t"	"psrlq	$18, %%mm0\n\t"
-		"andl	%%ecx, %%esi\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"orl	%%esi, %%edx\n\t"	"por	%%mm0, %%mm1\n\t"
-		"shll	$2, %%esi\n\t"		"psrlq	$18, %%mm0\n\t"
-		"andl	%%ecx, %%esi\n\t"	"pand	%%mm3, %%mm0\n\t"
-		"orl	%%edx, %%esi\n\t"	"por	%%mm1, %%mm0\n\t"
-		"addl	%%esi, %%esi\n\t"	"psrlq	$9, %%mm0\n\t"
-		"orl	%%eax, %%esi\n\t"	"por	%%mm0, %%mm2\n\t"
-								/* shift=+9 */
-						"movq	%%mm4, %%mm0\n\t"
-						"psllq	$9, %%mm0\n\t"
-						"pand	%%mm5, %%mm0\n\t"
-						"movq	%%mm0, %%mm1\n\t"
-						"psllq	$9, %%mm0\n\t"
-						"pand	%%mm5, %%mm0\n\t"
-						"por	%%mm1, %%mm0\n\t"
-						"psllq	$9, %%mm3\n\t"
-						"movq	%%mm0, %%mm1\n\t"
-						"psllq	$18, %%mm0\n\t"
-						"pand	%%mm3, %%mm0\n\t"
-		"movl	%1, %%eax\n\t"		"por	%%mm0, %%mm1\n\t"
-		"movl	%2, %%edx\n\t"		"psllq	$18, %%mm0\n\t"
-		"orl	%3, %%eax\n\t"		"pand	%%mm3, %%mm0\n\t"
-		"orl	%4, %%edx\n\t"		"por	%%mm1, %%mm0\n\t"
-		"notl	%%eax\n\t"		"psllq	$9, %%mm0\n\t"
-		"notl	%%edx\n\t"		"por	%%mm0, %%mm2\n\t"
-		/* mm2|(esi:ebx) is the pseudo-feasible moves at this point. */
-		/* Let edx:eax be the feasible moves, i.e., mm2 restricted to empty squares. */
-		"movd	%%mm2, %%ecx\n\t"	"punpckhdq %%mm2, %%mm2\n\t"
-		"orl	%%ecx, %%ebx\n\t"
-		"movd	%%mm2, %%ecx\n\t"
-		"orl	%%ecx, %%esi\n\t"
-		"andl	%%ebx, %%eax\n\t"
-		"andl	%%esi, %%edx\n\t"
-		"emms"		/* Reset the FP/MMX unit. */
-	: "=&A" (moves)
-	: "m" (P), "m" (((unsigned int *)&P)[1]), "m" (O), "m" (((unsigned int *)&O)[1]), "m" (mask_7e)
-	: "ebx", "ecx", "esi", "edi", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5" );
-
-	return moves;
-}
-#endif
-
-/**
- * @brief MMX translation of get_stability()
- *
- * x 1.5 faster bench stability on 32-bit x86.
- *
- */
-#ifdef USE_MSVC_X86
-
-int get_stability_mmx(unsigned long long P_, unsigned long long O_)
-{
-	__m64	P, O, P_central, disc, full_h, full_v, full_d7, full_d9, full_l, full_r, stable;
-	__m64	stable_h, stable_v, stable_d7, stable_d9, old_stable, m;
-	unsigned int	OL, OH, PL, PH, t, a1a8po, h1h8po;
-	static const unsigned long long MFF = 0xffffffffffffffff;
-	static const unsigned long long edge = 0xff818181818181ffULL;
-	static const unsigned long long e7[] = { 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff };
-	static const unsigned long long e9[] = { 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0x0f0f0f0ff0f0f0f0 };
-
-	P = *(__m64 *) &P_;
-	O = *(__m64 *) &O_;
-	disc = _m_por(P, O);
-	P_central = _m_pandn(*(__m64 *) &edge, P);
-
-	// get full lines and set intersection of them to stable
-	// get_full_lines_mmx(full_h, disc, 1, e1);
-	full_h = _m_pcmpeqb(*(__m64 *) &MFF, disc);
-	stable = _m_pand(P_central, full_h);
-
-	// get_full_lines_mmx(full_v, disc, 8, e8);
-	full_v = _m_pand(_m_punpcklbw(disc, disc), _m_punpckhbw(disc, disc));	//  (d,d,c,c,b,b,a,a) & (h,h,g,g,f,f,e,e)
-	full_v = _m_pand(_m_punpcklwd(full_v, full_v), _m_punpckhwd(full_v, full_v));	// (dh,dh,dh,dh,cg,cg,cg,cg) & (bf,bf,bf,bf,ae,ae,ae,ae)
-	full_v = _m_pand(_m_punpckldq(full_v, full_v), _m_punpckhdq(full_v, full_v));	// (bdfh*4, bdfh*4) & (aceg*4, aceg*4)
-	stable = _m_pand(stable, full_v);
-
-	// get_full_lines_mmx(full_d7, disc, 7, e7);
-	full_l = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psrlqi(disc, 7)));
-	full_r = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psllqi(disc, 7)));
-	full_l = _m_pand(full_l, _m_por(*(__m64 *) &e7[0], _m_psrlqi(full_l, 14)));
-	full_r = _m_pand(full_r, _m_por(*(__m64 *) &e7[1], _m_psllqi(full_r, 14)));
-	full_l = _m_pand(full_l, _m_por(*(__m64 *) &e7[2], _m_psrlqi(full_l, 28)));
-	full_r = _m_pand(full_r, _m_por(*(__m64 *) &e7[3], _m_psllqi(full_r, 28)));
-	full_d7 = _m_pand(full_l, full_r);
-	stable = _m_pand(stable, full_d7);
-
-	// get_full_lines_mmx(full_d9, disc, 9, e9);
-
-	full_l = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psrlqi(disc, 9)));
-	full_r = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psllqi(disc, 9)));
-	full_l = _m_pand(full_l, _m_por(*(__m64 *) &e9[0], _m_psrlqi(full_l, 18)));
-	full_r = _m_pand(full_r, _m_por(*(__m64 *) &e9[1], _m_psllqi(full_r, 18)));
-	full_d9 = _m_pand(_m_pand(full_l, full_r), _m_por(*(__m64 *) &e9[2], _m_por(_m_psrlqi(full_l, 36), _m_psllqi(full_r, 36))));
-	stable = _m_pand(stable, full_d9);
-
-	// compute the exact stable edges (from precomputed tables)
-	OL = (unsigned int) O_;	OH = (unsigned int)(O_ >> 32);
-	PL = (unsigned int) P_;	PH = (unsigned int)(P_ >> 32);
-	a1a8po = ((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24) * 256
-		+ ((((OL & 0x01010101u) + ((OH & 0x01010101u) << 4)) * 0x01020408u) >> 24);
-	h1h8po = ((((PH & 0x80808080u) + ((PL & 0x80808080u) >> 4)) * 0x00204081u) >> 24) * 256
-		+ ((((OH & 0x80808080u) + ((OL & 0x80808080u) >> 4)) * 0x00204081u) >> 24);
-	stable = _m_por(stable, _m_por(_m_por(*(__m64 *) &A1_A8[edge_stability[a1a8po]],
-		_m_psllqi(*(__m64 *) &A1_A8[edge_stability[h1h8po]], 7)),
-		_m_punpckldq(_m_from_int(edge_stability[(PL & 0xff) * 256 + (OL & 0xff)]),
-		_m_from_int(edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24))));
-
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-	t = _m_to_int(_m_packsswb(stable, stable));
-	if (t) {
-		do {
-			old_stable = stable;
-			stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full_h);
-			stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full_v);
-			stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full_d7);
-			stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full_d9);
-			stable = _m_por(stable, _m_pand(_m_pand(_m_pand(_m_pand(stable_h, stable_v), stable_d7), stable_d9), P_central));
-			m = _m_pxor(stable, old_stable);
-		} while (_m_to_int(_m_packsswb(m, m)) != 0);
-
-#ifdef POPCOUNT
-		t = __popcnt(_m_to_int(stable)) + __popcnt(_m_to_int(_m_psrlqi(stable, 32)));
-#else
-		m = _m_psubd(stable, _m_pand(_m_psrlqi(stable, 1), *(__m64 *) &mask_55));
-		m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33));
-		m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F);
-		t = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24;
-#endif
-	}
-	_mm_empty();
-	return t;
-}
-
-#elif defined(USE_GAS_MMX) && !(defined(__clang__) && (__clang__major__ < 3))
-// LLVM ERROR: Unsupported asm: input constraint with a matching output constraint of incompatible type!
-
-#define	get_full_lines_mmx(result,disc,dir,edge)	__asm__ (\
-		"movq	%2, %%mm0\n\t"		"movq	%2, %%mm1\n\t"\
-		"psrlq	%3, %%mm0\n\t"		"psllq	%3, %%mm1\n\t"\
-		"por	%6, %%mm0\n\t"		"por	%6, %%mm1\n\t"\
-		"pand	%2, %%mm0\n\t"		"pand	%2, %%mm1\n\t"\
-		"movq	%%mm0, %%mm2\n\t"	"movq	%%mm1, %%mm3\n\t"\
-		"psrlq	%4, %%mm0\n\t"		"psllq	%4, %%mm1\n\t"\
-		"por	%7, %%mm0\n\t"		"por	%8, %%mm1\n\t"\
-		"pand	%%mm2, %%mm0\n\t"	"pand	%%mm3, %%mm1\n\t"\
-		"movq	%%mm0, %%mm2\n\t"	"pand	%%mm1, %%mm0\n\t"\
-		"psrlq	%5, %%mm2\n\t"		"psllq	%5, %%mm1\n\t"\
-		"por	%9, %%mm2\n\t"		"por	%10, %%mm1\n\t"\
-		"pand	%%mm2, %%mm0\n\t"	"pand	%%mm1, %%mm0\n\t"\
-		"movq	%%mm0, %0\n\t"\
-		"pand	%%mm0, %1"\
-	: "=m" (result), "+y" (stable)\
-	: "y" (disc), "i" (dir), "i" (dir * 2), "i" (dir * 4),\
-	  "my" (e0), "m" (edge[0]), "m" (edge[1]), "m" (edge[2]), "m" (edge[3])\
-	: "mm0", "mm1", "mm2", "mm3");
-
-int get_stability_mmx(unsigned long long P_, unsigned long long O_)
-{
-	__m64	P, O, P_central, disc, full_h, full_v, full_d7, full_d9, stable;
-#ifdef hasSSE2
-	__v2di	PO;
-#endif
-	unsigned int	OL, OH, PL, PH, t, a1a8po, h1h8po;
-	static const unsigned long long e0 = 0xff818181818181ffULL;
-	static const unsigned long long e7[] = { 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff };
-	static const unsigned long long e9[] = { 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0xfffffffff0f0f0f0, 0x0f0f0f0fffffffff };
-
-	__asm__ (
-		"movd	%2, %0\n\t"		"movd	%4, %1\n\t"		// (movd for store-forwarding)
-		"punpckldq %3, %0\n\t"		"punpckldq %5, %1"
-	: "=&y" (P), "=&y" (O) : "m" (P_), "m" (((unsigned int *)&P_)[1]), "m" (O_), "m" (((unsigned int *)&O_)[1]));
-#ifdef hasSSE2
-	PO = _mm_unpacklo_epi64(_mm_movpi64_epi64(O), _mm_movpi64_epi64(P));
-#endif
-	__asm__ (
-		"por	%3, %0\n\t"
-		"pandn	%3, %1\n\t"
-		"movq	%1, %2"
-	: "=y" (disc), "=y" (stable), "=m" (P_central)
-	: "y" (P), "0" (O), "1" (e0));
-
-	// get full lines and set intersection of them to stable
-	// get_full_lines_mmx(full_h, disc, 1, e1);
-	__asm__ (
-		"pcmpeqb %%mm0, %%mm0\n\t"
-		"pcmpeqb %2, %%mm0\n\t"
-		"movq	%%mm0, %0\n\t"
-		"pand	%%mm0, %1"
-	: "=m" (full_h), "+y" (stable) : "y" (disc) : "mm0");
-	// get_full_lines_mmx(full_v, disc, 8, e8);
-	__asm__ (
-		"movq	%2, %%mm0\n\t"		"movq	%2, %%mm1\n\t"
-		"punpcklbw %%mm0, %%mm0\n\t"	"punpckhbw %%mm1, %%mm1\n\t"
-		"pand	%%mm1, %%mm0\n\t"	// (d,d,c,c,b,b,a,a) & (h,h,g,g,f,f,e,e)
-#ifdef hasSSE2
-		"pshufw	$177, %%mm0, %%mm1\n\t"
-		"pand	%%mm1, %%mm0\n\t"	// (cg,cg,dh,dh,ae,ae,bf,bf) & (dh,dh,cg,cg,bf,bf,ae,ae)
-		"pshufw	$78, %%mm0, %%mm1\n\t"
-		"pand	%%mm1, %%mm0\n\t"	// (abef*4, cdgh*4) & (cdgh*4, abef*4)
-#else
-		"movq	%%mm0, %%mm1\n\t"
-		"punpcklwd %%mm0, %%mm0\n\t"	"punpckhwd %%mm1, %%mm1\n\t"
-		"pand	%%mm1, %%mm0\n\t"	// (dh,dh,dh,dh,cg,cg,cg,cg) & (bf,bf,bf,bf,ae,ae,ae,ae)
-		"movq	%%mm0, %%mm1\n\t"
-		"punpckldq %%mm0, %%mm0\n\t"	"punpckhdq %%mm1, %%mm1\n\t"
-		"pand	%%mm1, %%mm0\n\t"	// (bdfh*4, bdfh*4) & (aceg*4, aceg*4)
-#endif
-		"movq	%%mm0, %0\n\t"
-		"pand	%%mm0, %1"
-	: "=m" (full_v), "+y" (stable) : "y" (disc) : "mm0", "mm1");
-	get_full_lines_mmx(full_d7, disc, 7, e7);
-	get_full_lines_mmx(full_d9, disc, 9, e9);
-
-	// compute the exact stable edges (from precomputed tables)
-	OL = (unsigned int) O_;	OH = (unsigned int)(O_ >> 32);
-	PL = (unsigned int) P_;	PH = (unsigned int)(P_ >> 32);
-#ifdef hasSSE2
-	a1a8po = _mm_movemask_epi8(_mm_slli_epi64(PO, 7));
-	h1h8po = _mm_movemask_epi8(PO);
-#else
-	a1a8po = ((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24) * 256
-		+ ((((OL & 0x01010101u) + ((OH & 0x01010101u) << 4)) * 0x01020408u) >> 24);
-	h1h8po = ((((PH & 0x80808080u) + ((PL & 0x80808080u) >> 4)) * 0x00204081u) >> 24) * 256
-		+ ((((OH & 0x80808080u) + ((OL & 0x80808080u) >> 4)) * 0x00204081u) >> 24);
-#endif
-	__asm__(
-		"movd	%1, %%mm0\n\t"		"por	%3, %0\n\t"
-		"movd	%2, %%mm1\n\t"
-		"punpckldq %%mm1, %%mm0\n\t"	"movq	%4, %%mm1\n\t"
-		"por	%%mm0, %0\n\t"		"psllq	$7, %%mm1\n\t"
-						"por	%%mm1, %0"
-	: "+y" (stable)
-	: "g" ((int) edge_stability[(PL & 0xff) * 256 + (OL & 0xff)]),
-	  "g" (edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24),
-	  "m" (A1_A8[edge_stability[a1a8po]]),
-	  "m" (A1_A8[edge_stability[h1h8po]])
-	: "mm0", "mm1");
-
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-	__asm__ (
-		"movq	%1, %%mm0\n\t"
-		"packsswb %%mm0, %%mm0\n\t"
-		"movd	%%mm0, %0\n\t"
-	: "=g" (t) : "y" (stable) : "mm0" );
-
-	if (t) {
-		do {
-			__asm__ (
-				"movq	%1, %%mm3\n\t"
-				"movq	%6, %1\n\t"
-				"movq	%%mm3, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"
-				"psrlq	$1, %%mm0\n\t"		"psllq	$1, %%mm1\n\t"		"movq	%%mm3, %%mm2\n\t"
-				"por	%%mm1, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"	"psrlq	$7, %%mm2\n\t"
-				"por	%2, %%mm0\n\t"		"psllq	$7, %%mm1\n\t"		"por	%%mm1, %%mm2\n\t"
-				"pand	%%mm0, %1\n\t"						"por	%4, %%mm2\n\t"
-				"movq	%%mm3, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"	"pand	%%mm2, %1\n\t"
-				"psrlq	$8, %%mm0\n\t"		"psllq	$8, %%mm1\n\t"		"movq	%%mm3, %%mm2\n\t"
-				"por	%%mm1, %%mm0\n\t"	"movq	%%mm3, %%mm1\n\t"	"psrlq	$9, %%mm2\n\t"
-				"por	%3, %%mm0\n\t"		"psllq	$9, %%mm1\n\t"		"por	%%mm1, %%mm2\n\t"
-				"pand	%%mm0, %1\n\t"						"por	%5, %%mm2\n\t"
-												"pand	%%mm2, %1\n\t"
-				"por	%%mm3, %1\n\t"
-				"pxor	%1, %%mm3\n\t"
-				"packsswb %%mm3, %%mm3\n\t"
-				"movd	%%mm3, %0"
-			: "=g" (t), "+y" (stable)
-			: "m" (full_h), "m" (full_v), "m" (full_d7), "m" (full_d9), "m" (P_central)
-			: "mm0", "mm1", "mm2", "mm3");
-		} while (t);
-
-		// bit_count(stable)
-#ifdef POPCOUNT
-		__asm__ (
-			"movd	%1, %0\n\t"
-			"psrlq	$32, %1\n\t"
-			"movd	%1, %%edx\n\t"
-			"popcntl %0, %0\n\t"
-			"popcntl %%edx, %%edx\n\t"
-			"addl	%%edx, %0"
-		: "=&a" (t) : "y" (stable) : "edx");
-#else
-		__asm__ (
-	 		"movq	%1, %%mm0\n\t"
-			"psrlq	$1, %1\n\t"
-			"pand	%2, %1\n\t"
-			"psubd	%1, %%mm0\n\t"
-
-			"movq	%%mm0, %%mm1\n\t"
-			"psrlq	$2, %%mm0\n\t"
-			"pand	%3, %%mm1\n\t"
-			"pand	%3, %%mm0\n\t"
-			"paddd	%%mm1, %%mm0\n\t"
-
-			"movq	%%mm0, %%mm1\n\t"
-			"psrlq	$4, %%mm0\n\t"
-			"paddd	%%mm1, %%mm0\n\t"
-			"pand	%4, %%mm0\n\t"
-	#ifdef hasSSE2
-			"pxor	%%mm1, %%mm1\n\t"
-			"psadbw	%%mm1, %%mm0\n\t"
-			"movd	%%mm0, %0\n\t"
-	#else
-			"movq	%%mm0, %%mm1\n\t"
-			"psrlq	$32, %%mm0\n\t"
-			"paddb	%%mm1, %%mm0\n\t"
-
-			"movd	%%mm0, %0\n\t"
-			"imull	$0x01010101, %0, %0\n\t"
-			"shrl	$24, %0"
-	#endif
-		: "=a" (t) : "y" (stable), "m" (mask_55), "my" (mask_33), "m" (mask_0F) : "mm0", "mm1");
-#endif
-	}
-	__asm__ ( "emms" );
-	return t;
-}
-#endif // USE_MSVC_X86
-
-/**
- * @brief MMX translation of get_potential_mobility
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return a count of potential moves.
- */
-#ifdef USE_MSVC_X86
-
-int get_potential_mobility_mmx(unsigned long long P, unsigned long long O)
-{
-	__m64	m, mO;
-	int	count;
-	static const unsigned long long mask_v = 0x00ffffffffffff00ULL;
-	// static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL;	// = mask_7e & mask_v
-#ifdef POPCOUNT
-	int	mh, ml;
-#else
-	static const unsigned long long mask_15 = 0x1555555555555515ULL;
-	static const unsigned long long mask_01 = 0x0100000000000001ULL;
-#endif
-
-	mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_7e);
-	m = _m_por(_m_psllqi(mO, 1), _m_psrlqi(mO, 1));
-	mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_v);
-	m = _m_por(m, _m_por(_m_psllqi(mO, 8), _m_psrlqi(mO, 8)));
-	mO = _m_pand(mO, *(__m64 *) &mask_7e);
-	m = _m_por(m, _m_por(_m_psllqi(mO, 7), _m_psrlqi(mO, 7)));
-	m = _m_por(m, _m_por(_m_psllqi(mO, 9), _m_psrlqi(mO, 9)));
-	m = _m_pandn(_m_por(*(__m64 *) &O, *(__m64 *) &P), m);
-
-#ifdef POPCOUNT
-	ml = _m_to_int(m);
-	mh = _m_to_int(_m_psrlqi(m, 32));
-	count = __popcnt(ml) + __popcnt(mh) + __popcnt((ml & 0x00000081) + (mh & 0x81000000));
-#else
-	m = _m_paddd(_m_psubd(m, _m_pand(_m_psrlqi(m, 1), *(__m64 *) &mask_15)), _m_pand(m, *(__m64 *) &mask_01));
-	m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33));
-	m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F);
-	count = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24;
-#endif
-	_mm_empty();
-	return count;
-}
-
-#elif defined(USE_GAS_MMX)
-
-int get_potential_mobility_mmx(unsigned long long P, unsigned long long O)
-{
-	int	count;
-	static const unsigned long long mask_v = 0x00ffffffffffff00ULL;
-	// static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL;	// = mask_7e & mask_v
-#ifndef POPCOUNT
-	static const unsigned long long mask_15 = 0x1555555555555515ULL;
-	static const unsigned long long mask_01 = 0x0100000000000001ULL;
-#endif
-
-	__asm__ (
-		"movq	%3, %%mm2\n\t"		"movq	%4, %%mm5\n\t"
-		"pand	%2, %%mm2\n\t"		"pand	%2, %%mm5\n\t"		"movq	%%mm2, %%mm3\n\t"
-		"movq	%%mm2, %%mm4\n\t"	"movq	%%mm5, %%mm6\n\t"	"pand	%%mm5, %%mm3\n\t"
-		"psllq	$1, %%mm2\n\t"		"psllq	$8, %%mm5\n\t"
-		"psrlq	$1, %%mm4\n\t"		"psrlq	$8, %%mm6\n\t"
-		"por	%%mm4, %%mm2\n\t"	"por	%%mm6, %%mm5\n\t"
-		"por	%%mm5, %%mm2\n\t"
-		"movq	%%mm3, %%mm5\n\t"
-		"movq	%%mm3, %%mm4\n\t"	"movq	%%mm5, %%mm6\n\t"
-		"psllq	$7, %%mm3\n\t"		"psllq	$9, %%mm5\n\t"
-		"psrlq	$7, %%mm4\n\t"		"psrlq	$9, %%mm6\n\t"
-		"por	%%mm4, %%mm3\n\t"	"por	%%mm6, %%mm5\n\t"
-		"por	%%mm3, %%mm2\n\t"	"por	%%mm5, %%mm2\n\t"
-		"por	%1, %2\n\t"
-		"pandn	%%mm2, %2\n\t"
-
-#ifdef POPCOUNT
-		"movd	%2, %%ecx\n\t"
-		"popcntl %%ecx, %0\n\t"		"andl	$0x00000081, %%ecx\n\t"
-		"psrlq	$32, %2\n\t"		"popcntl %%ecx, %%ecx\n\t"
-		"movd	%2, %%edx\n\t"		"addl	%%ecx, %0\n\t"
-		"popcntl %%edx, %%ecx\n\t"	"andl	$0x81000000, %%edx\n\t"
-		"addl	%%ecx, %0\n\t"		"popcntl %%edx, %%edx\n\t"
-						"addl	%%edx, %0\n\t"
-		"emms"
-	: "=g" (count) : "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v)
-	: "ecx", "edx", "mm2", "mm3", "mm4", "mm5", "mm6");
-
-#else
-		"movq	%2, %1\n\t"		"movq	%2, %%mm2\n\t"
-		"psrlq	$1, %2\n\t"
-		"pand	%5, %2\n\t"		"pand	%6, %%mm2\n\t"
-		"psubd	%2, %1\n\t"		"paddd	%%mm2, %1\n\t"
-
-		"movq	%1, %2\n\t"
-		"psrlq	$2, %1\n\t"
-		"pand	%7, %2\n\t"
-		"pand	%7, %1\n\t"
-		"paddd	%2, %1\n\t"
-
-		"movq	%1, %2\n\t"
-		"psrlq	$4, %1\n\t"
-		"paddd	%2, %1\n\t"
-		"pand	%8, %1\n\t"
-	#ifdef hasSSE2
-		"pxor	%2, %2\n\t"
-		"psadbw	%2, %1\n\t"
-		"movd	%1, %0\n\t"
-	#else
-		"movq	%1, %2\n\t"
-		"psrlq	$32, %1\n\t"
-		"paddb	%2, %1\n\t"
-
-		"movd	%1, %0\n\t"
-		"imull	$0x01010101, %0, %0\n\t"
-		"shrl	$24, %0\n\t"
-	#endif
-		"emms"
-	: "=g" (count)
-	: "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v),
-	  "m" (mask_15), "m" (mask_01), "m" (mask_33), "m" (mask_0F)
-	: "mm2", "mm3", "mm4", "mm5", "mm6");
-#endif
-
-	return count;
-}
-#endif
-
-/**
- * @brief MMX translation of board_get_hash_code.
- *
- * @param p pointer to 16 bytes to hash.
- * @return the hash code of the bitboard
- */
-
-#if defined(USE_GAS_MMX) && defined(__3dNOW__)
-
-unsigned long long board_get_hash_code_mmx(const unsigned char *p)
-{
-	unsigned long long h;
-
-	__asm__ volatile (
-		"movq	%0, %%mm0\n\t"		"movq	%1, %%mm1"
-	: : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]]));
-	__asm__ volatile (
-		"pxor	%0, %%mm0\n\t"		"pxor	%1, %%mm1"
-	: : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]]));
-	__asm__ volatile (
-		"pxor	%1, %%mm0\n\t"		"pxor	%2, %%mm1\n\t"
-		"pxor	%%mm1, %%mm0\n\t"
-		"movd	%%mm0, %%eax\n\t"
-		"punpckhdq %%mm0, %%mm0\n\t"
-		"movd	%%mm0, %%edx\n\t"
-		"emms"
-	: "=A" (h)
-	: "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]])
-	: "mm0", "mm1");
-
-	return h;
-}
-
-#endif // __3dNOW
-
-#if !defined(hasMMX) && defined(USE_GAS_MMX)
-	#pragma GCC pop_options
-#endif
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/board_sse.c b/src/board_sse.c
index 23acd3a..9a5ef40 100644
--- a/src/board_sse.c
+++ b/src/board_sse.c
@@ -1,90 +1,33 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file board_sse.c
  *
  * SSE/AVX translation of some board.c functions
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b4fb773 (AVX optimized board_unique)
  * @date 2014 - 2024
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 2014 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 2014 - 2022
-=======
- * @date 2014 - 2023
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
  */
 
 #include "bit.h"
 #include "hash.h"
 #include "board.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(ANDROID) && !defined(HAS_CPU_64) && !defined(hasSSE2)
-=======
-#if defined(ANDROID) && !defined(hasNeon) && !defined(hasSSE2)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 #if defined(ANDROID) && !defined(HAS_CPU_64) && !defined(hasSSE2)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #include "android/cpu-features.h"
 
 bool	hasSSE2 = false;
 
 void init_neon (void)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
   #ifdef __arm__
-=======
-#ifdef __arm__
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-  #ifdef __arm__
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 	if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) {
 	#if (MOVE_GENERATOR == MOVE_GENERATOR_BITSCAN)
 		extern unsigned long long (*flip_neon[66])(const unsigned long long, const unsigned long long);
 		memcpy(flip, flip_neon, sizeof(flip_neon));
 	#endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 		hasSSE2 = true;	// for eval_update_sse
 	}
   #elif defined(__i386__)	// android x86 w/o SSE2 - uncommon and not tested
-<<<<<<< HEAD
-=======
-		hasSSE2 = true;
-=======
-		hasSSE2 = true;	// for eval_update_sse
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-	}
-<<<<<<< HEAD
-#else	// android x86 w/o SSE2 - uncommon and not tested
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-  #else	// android x86 w/o SSE2 - uncommon and not tested
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 	int	cpuid_edx, cpuid_ecx;
 	__asm__ (
 		"movl	$1, %%eax\n\t"
@@ -92,41 +35,18 @@ void init_neon (void)
 	: "=d" (cpuid_edx), "=c" (cpuid_ecx) :: "%eax", "%ebx" );
 	if ((cpuid_edx & 0x04000000u) != 0)
 		hasSSE2 = true;
-<<<<<<< HEAD
-<<<<<<< HEAD
   #endif
 }
 #endif
 
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#endif
-=======
-  #endif
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-}
-#endif
-
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 /**
  * @brief SSE2 translation of board_symetry
  *
  * @param board input board
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
- * @param s symetry
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 6bc747d (Split board_flip_* from board_symetry)
  * @param sym symetric output board
  */
 #ifdef hasSSE2
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 static __m128i vectorcall board_horizontal_mirror_sse(__m128i bb)
 {
 	const __m128i mask0F0F = _mm_set1_epi16(0x0F0F);
@@ -315,202 +235,19 @@ int board_unique(const Board *board, Board *unique)
 }
 #endif
 
-<<<<<<< HEAD
-/**
- * @brief Compute a board resulting of a move played on a previous board.
- *
- * @param OP board to play the move on.
-=======
-void board_symetry(const Board *board, const int s, Board *sym)
-=======
-void board_horizontal_mirror(const Board *board, Board *sym)
->>>>>>> 6bc747d (Split board_flip_* from board_symetry)
-{
-	__m128i	bb = _mm_loadu_si128((__m128i *) board);
-=======
-static __m128i vectorcall board_horizontal_mirror_sse(__m128i bb)
-{
->>>>>>> a23c3d4 (SSE optimized board_symetry again)
-	const __m128i mask0F0F = _mm_set1_epi16(0x0F0F);
-  #if defined(__SSSE3__) || defined(__AVX__)	// pshufb (cf. http://wm.ite.pl/articles/sse-popcount.html)
-	const __m128i mbitrev  = _mm_set_epi8(15, 7, 11, 3, 13, 5, 9, 1, 14, 6, 10, 2, 12, 4, 8, 0);
-	bb = _mm_or_si128(_mm_shuffle_epi8(mbitrev, _mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F)),
-		_mm_slli_epi64(_mm_shuffle_epi8(mbitrev, _mm_and_si128(bb, mask0F0F)), 4));
-  #else
-	const __m128i mask5555 = _mm_set1_epi16(0x5555);
-	const __m128i mask3333 = _mm_set1_epi16(0x3333);
-	bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 1), mask5555), _mm_slli_epi64(_mm_and_si128(bb, mask5555), 1));
-	bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 2), mask3333), _mm_slli_epi64(_mm_and_si128(bb, mask3333), 2));
-	bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F), _mm_slli_epi64(_mm_and_si128(bb, mask0F0F), 4));
-  #endif
-	return bb;
-}
-
-void board_horizontal_mirror(const Board *board, Board *sym)
-{
-	_mm_storeu_si128((__m128i *) sym, board_horizontal_mirror_sse(_mm_loadu_si128((__m128i *) board)));
-}
-
-static __m128i vectorcall board_vertical_mirror_sse(__m128i bb)
-{
-  #if defined(__SSSE3__) || defined(__AVX__)	// pshufb
-	return _mm_shuffle_epi8(bb, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7));
-  #else
-	bb = _mm_or_si128(_mm_srli_epi16(bb, 8), _mm_slli_epi16(bb, 8));
-	return _mm_shufflehi_epi16(_mm_shufflelo_epi16(bb, 0x1b), 0x1b);
-  #endif
-}
-
-void board_vertical_mirror(const Board *board, Board *sym)
-{
-  #if defined(__SSSE3__) || defined(__AVX__) || !defined(HAS_CPU_64)
-	_mm_storeu_si128((__m128i *) sym, board_vertical_mirror_sse(_mm_loadu_si128((__m128i *) board)));
-  #else	// use BSWAP64
-	sym->player = vertical_mirror(board->player);
-	sym->opponent = vertical_mirror(board->opponent);
-  #endif
-}
-
-static __m128i vectorcall board_transpose_sse(__m128i bb)
-{
-	const __m128i mask00AA = _mm_set1_epi16(0x00AA);
-	const __m128i maskCCCC = _mm_set1_epi32(0x0000CCCC);
-	const __m128i mask00F0 = _mm_set1_epi64x(0x00000000F0F0F0F0);
-	__m128i tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 7)), mask00AA);
-	bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 7));
-	tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 14)), maskCCCC);
-	bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 14));
-	tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 28)), mask00F0);
-	bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 28));
-	return bb;
-}
-
-void board_transpose(const Board *board, Board *sym)
-{
-	_mm_storeu_si128((__m128i *) sym, board_transpose_sse(_mm_loadu_si128((__m128i *) board)));
-}
-
-void board_symetry(const Board *board, const int s, Board *sym)
-{
-	__m128i	bb = _mm_loadu_si128((__m128i *) board);
-	if (s & 1)
-		bb = board_horizontal_mirror_sse(bb);
-	if (s & 2)
-		bb = board_vertical_mirror_sse(bb);
-	if (s & 4)
-		bb = board_transpose_sse(bb);
-
-	_mm_storeu_si128((__m128i *) sym, bb);
-	board_check(sym);
-}
-
-#elif defined(__ARM_NEON) && !defined(DISPATCH_NEON)
-
-static uint64x2_t board_horizontal_mirror_neon(uint64x2_t bb)
-{
-  #ifdef HAS_CPU_64
-	bb = vreinterpretq_u64_u8(vrbitq_u8(vreinterpretq_u8_u64(bb)));
-  #else
-	bb = vbslq_u64(vdupq_n_u64(0x5555555555555555), vshrq_n_u64(bb, 1), vshlq_n_u64(bb, 1));
-	bb = vbslq_u64(vdupq_n_u64(0x3333333333333333), vshrq_n_u64(bb, 2), vshlq_n_u64(bb, 2));
-	bb = vreinterpretq_u64_u8(vsliq_n_u8(vshrq_n_u8(vreinterpretq_u8_u64(bb), 4), vreinterpretq_u8_u64(bb), 4));
-  #endif
-	return bb;
-}
-
-void board_horizontal_mirror(const Board *board, Board *sym)
-{
-	vst1q_u64((uint64_t *) sym, board_horizontal_mirror_neon(vld1q_u64((uint64_t *) board)));
-}
-
-static uint64x2_t board_vertical_mirror_neon(uint64x2_t bb)
-{
-	return vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(bb)));
-}
-
-void board_vertical_mirror(const Board *board, Board *sym)
-{
-	vst1q_u64((uint64_t *) sym, board_vertical_mirror_neon(vld1q_u64((uint64_t *) board)));
-}
-
-static uint64x2_t board_transpose_neon(uint64x2_t bb)
-{
-	uint64x2_t tt = vandq_u64(veorq_u64(bb, vshrq_n_u64(bb, 7)), vdupq_n_u64(0x00AA00AA00AA00AA));
-	bb = veorq_u64(veorq_u64(bb, tt), vshlq_n_u64(tt, 7));
-	tt = vandq_u64(veorq_u64(bb, vshrq_n_u64(bb, 14)), vdupq_n_u64(0x0000CCCC0000CCCC));
-	bb = veorq_u64(veorq_u64(bb, tt), vshlq_n_u64(tt, 14));
-	tt = vandq_u64(veorq_u64(bb, vshrq_n_u64(bb, 28)), vdupq_n_u64(0x00000000F0F0F0F0));
-	bb = veorq_u64(veorq_u64(bb, tt), vshlq_n_u64(tt, 28));
-	return bb;
-}
-
-void board_transpose(const Board *board, Board *sym)
-{
-	vst1q_u64((uint64_t *) sym, board_transpose_neon(vld1q_u64((uint64_t *) board)));
-}
-
-void board_symetry(const Board *board, const int s, Board *sym)
-{
-	uint64x2_t bb = vld1q_u64((uint64_t *) board);
-	if (s & 1)
-		bb = board_horizontal_mirror_neon(bb);
-	if (s & 2)
-		bb = board_vertical_mirror_neon(bb);
-	if (s & 4)
-		bb = board_transpose_neon(bb);
-
-	vst1q_u64((uint64_t *) sym, bb);
-	board_check(sym);
-}
-
-#endif // hasSSE2/Neon
-
-=======
->>>>>>> b4fb773 (AVX optimized board_unique)
 /**
  * @brief Compute a board resulting of a move played on a previous board.
  *
-<<<<<<< HEAD
- * @param board board to play the move on.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * @param OP board to play the move on.
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
  * @param x move to play.
  * @param next resulting board.
  * @return flipped discs.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 unsigned long long vectorcall board_next_sse(__m128i OP, const int x, Board *next)
 {
 	__m128i flipped = reduce_vflip(mm_Flip(OP, x));
-=======
-=======
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)
-=======
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-unsigned long long board_next(const Board *board, const int x, Board *next)
-=======
-unsigned long long vectorcall vboard_next(__m128i OP, const int x, Board *next)
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-{
-<<<<<<< HEAD
-	__m128i flipped = mm_Flip(OP, x);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	__m128i flipped = reduce_vflip(mm_Flip(OP, x));
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
 
 	OP = _mm_xor_si128(OP, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
 	_mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(OP, 0x4e));
@@ -518,27 +255,6 @@ unsigned long long vectorcall vboard_next(__m128i OP, const int x, Board *next)
 	return _mm_cvtsi128_si64(flipped);
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-
-unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next)
-{
-	uint64x2_t flipped = mm_Flip(OP, x);
-  #if !defined(_MSC_VER) && !defined(__clang__)	// MSVC-arm32 does not have vld1q_lane_u64
-	// arm64-gcc-13: 21, armv8a-clang-16: 23, msvc-arm64-19: 22, gcc-arm-13: 18, clang-armv7-11: 29 // https://godbolt.org/z/cvhns39rK
-	OP = veorq_u64(OP, vorrq_u64(flipped, vld1q_lane_u64((uint64_t *) &X_TO_BIT[x], flipped, 0)));
-	vst1q_u64((uint64_t *) next, vextq_u64(OP, OP, 1));
-  #else	// arm64-gcc-13: 21, armv8a-clang-16: 22, msvc-arm64-19: 21, gcc-arm-13: 23, clang-armv7-11: 27
-	OP = veorq_u64(OP, flipped);
-	vst1q_u64((uint64_t *) next, vcombine_u64(vget_high_u64(OP), vorr_u64(vget_low_u64(OP), vld1_u64((uint64_t *) &X_TO_BIT[x]))));
-  #endif
-	return vgetq_lane_u64(flipped, 0);
-}
-#endif
-
-=======
-=======
 #elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
 
 unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next)
@@ -556,56 +272,7 @@ unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next)
 }
 #endif
 
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-/**
-<<<<<<< HEAD
- * @brief Compute a board resulting of an opponent move played on a previous board.
- *
- * Compute the board after passing and playing a move.
- *
- * @param board board to play the move on.
- * @param x opponent move to play.
- * @param next resulting board.
- * @return flipped discs.
- */
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)
-
-unsigned long long board_pass_next(const Board *board, const int x, Board *next)
-{
-	__m128i	PO = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e);
-	__m128i flipped = mm_Flip(PO, x);
-
-	PO = _mm_xor_si128(PO, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
-	_mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(PO, 0x4e));
-
-	return _mm_cvtsi128_si64(flipped);
-}
-
-#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON
-
-unsigned long long board_pass_next(const Board *board, const int x, Board *next)
-{
-	uint64x2_t OP = vld1q_u64((uint64_t *) board);
-	uint64x2_t PO = vextq_u64(OP, OP, 1);
-	uint64x2_t flipped = mm_Flip(PO, x);
-
-#ifdef HAS_CPU_64	// vld1q_lane_u64
-	PO = veorq_u64(PO, vorrq_u64(flipped, vld1q_lane_u64((uint64_t *) &X_TO_BIT[x], flipped, 0)));
-	vst1q_u64((uint64_t *) next, vextq_u64(PO, PO, 1));
-#else
-	PO = veorq_u64(OP, flipped);
-	vst1_u64(&next->player, vget_high_u64(PO));
-	vst1_u64(&next->opponent, vorr_u64(vget_low_u64(PO), vld1_u64(&X_TO_BIT[x])));
-#endif
-	return vgetq_lane_u64(flipped, 0);
-}
-
-#endif
-
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
-=======
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
  * @brief X64 optimized get_moves
  *
  * Diag-7 is converted to diag-9 (v.v.) using vertical mirroring
@@ -617,53 +284,10 @@ unsigned long long board_pass_next(const Board *board, const int x, Board *next)
  */
 #ifdef __AVX2__	// 4 AVX
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
   #if defined(_MSC_VER) || defined(__linux__)	// vectorcall and SYSV-ABI passes __m256i in registers
-=======
-#if (vBoard == __m128i) && (defined(_MSC_VER) || defined(__linux__))	// vectorcall and SYSV-ABI passes __m256i in registers
->>>>>>> 78ce5d7 (more precise rboard/vboard opt; reexamine neon vboard_next)
-=======
-  #if (defined(_MSC_VER) || defined(__linux__))	// vectorcall and SYSV-ABI passes __m256i in registers
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-  #if defined(_MSC_VER) || defined(__linux__)	// vectorcall and SYSV-ABI passes __m256i in registers
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO)
-{
-  #else
-unsigned long long get_moves(unsigned long long P, unsigned long long O)	// minGW
-{
-	__m256i	PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P));
-	__m256i OO = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(O));
-  #endif
-	__m256i	MM, flip_l, flip_r, pre_l, pre_r, shift2;
-	__m128i	M;
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-	__m256i	mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x007E7E7E7E7E7E00, 0x007E7E7E7E7E7E00, 0x00FFFFFFFFFFFF00, 0x7E7E7E7E7E7E7E7E));
-	__m128i occupied = _mm_or_si128(_mm256_castsi256_si128(PP), _mm256_castsi256_si128(OO));
-=======
-unsigned long long get_moves(const unsigned long long P, const unsigned long long O)
-=======
-#if defined(_MSC_VER) || defined(__clang__)
-=======
-#if defined(_MSC_VER) || defined(__linux__)	// vectorcall and SYSV-ABI passes __m256i in registers
->>>>>>> 29ed6b6 (Include gcc linux to get_moves_avx with mm256 params)
-unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO)
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-{
-#else
-=======
-  #if (vBoard == __m128i) && (defined(_MSC_VER) || defined(__linux__))	// vectorcall and SYSV-ABI passes __m256i in registers
 unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO)
 {
   #else
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 unsigned long long get_moves(unsigned long long P, unsigned long long O)	// minGW
 {
 	__m256i	PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P));
@@ -672,20 +296,8 @@ unsigned long long get_moves(unsigned long long P, unsigned long long O)	// minG
 	__m256i	MM, flip_l, flip_r, pre_l, pre_r, shift2;
 	__m128i	M;
 	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const __m256i mflipH = _mm256_set_epi64x(0x7e7e7e7e7e7e7e7e, 0x7e7e7e7e7e7e7e7e, -1, 0x7e7e7e7e7e7e7e7e);
-
-	PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P));
-	mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(O)), mflipH);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	__m256i	mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x7e7e7e7e7e7e7e7e, 0x7e7e7e7e7e7e7e7e, -1, 0x7e7e7e7e7e7e7e7e));
-=======
 	__m256i	mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x007E7E7E7E7E7E00, 0x007E7E7E7E7E7E00, 0x00FFFFFFFFFFFF00, 0x7E7E7E7E7E7E7E7E));
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 	__m128i occupied = _mm_or_si128(_mm256_castsi256_si128(PP), _mm256_castsi256_si128(OO));
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
 
 	flip_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(PP, shift1897));
 	flip_r = _mm256_and_si256(mOO, _mm256_srlv_epi64(PP, shift1897));
@@ -698,27 +310,10 @@ unsigned long long get_moves(unsigned long long P, unsigned long long O)	// minG
 	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2)));
 	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2)));
 	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2)));
-<<<<<<< HEAD
-<<<<<<< HEAD
-	MM = _mm256_or_si256(_mm256_sllv_epi64(flip_l, shift1897), _mm256_srlv_epi64(flip_r, shift1897));
-
-	M = _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1));
-	return _mm_cvtsi128_si64(_mm_andnot_si128(occupied, _mm_or_si128(M, _mm_unpackhi_epi64(M, M))));	// mask with empties
-=======
-	MM = _mm256_sllv_epi64(flip_l, shift1897);
-	MM = _mm256_or_si256(MM, _mm256_srlv_epi64(flip_r, shift1897));
-=======
 	MM = _mm256_or_si256(_mm256_sllv_epi64(flip_l, shift1897), _mm256_srlv_epi64(flip_r, shift1897));
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 
 	M = _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1));
-<<<<<<< HEAD
-	M = _mm_or_si128(M, _mm_unpackhi_epi64(M, M));
-	return _mm_cvtsi128_si64(M) & ~(P|O);	// mask with empties
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	return _mm_cvtsi128_si64(_mm_andnot_si128(occupied, _mm_or_si128(M, _mm_unpackhi_epi64(M, M))));	// mask with empties
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
 }
 
 #elif defined(__x86_64__) || defined(_M_X64)	// 2 SSE, 2 CPU
@@ -750,15 +345,7 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon
 	return moves & ~(P|O);	// mask with empties
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif defined(__aarch64__) || defined(_M_ARM64)	// 4 CPU
-=======
-#elif 0	// 4 CPU
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #elif defined(__aarch64__) || defined(_M_ARM64)	// 4 CPU
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 unsigned long long get_moves(const unsigned long long P, const unsigned long long O)
 {
@@ -782,26 +369,9 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon
 	return moves & ~(P|O);	// mask with empties
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #elif defined(__ARM_NEON)	// 3 Neon, 1 CPU(32)
 
   #ifndef DISPATCH_NEON
-<<<<<<< HEAD
-	#define	get_moves_sse	get_moves	// no dispatch
-  #endif
-=======
-#else // __x86_64__
-=======
-#elif defined(__ARM_NEON__)	// 3 Neon, 1 CPU(32)
-=======
-#elif defined(__ARM_NEON)	// 3 Neon, 1 CPU(32)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-
-  #ifdef hasNeon
-=======
->>>>>>> b1cae3c (Rewrite AVX512 LASTFLIP_HIGHCUT not to use kortest)
 	#define	get_moves_sse	get_moves	// no dispatch
   #endif
 
@@ -847,109 +417,29 @@ unsigned long long get_moves_sse(unsigned long long P, unsigned long long O)
 }
 
 #else // AVX/x86_64/arm
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 /**
  * @brief SSE optimized get_moves for x86 - 3 SSE, 1 CPU(32)
  *
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID)
+  #if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID)
 
-#ifdef hasSSE2
-#define	get_moves_sse	get_moves	// no dispatch
-#endif
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
+    #ifdef hasSSE2
+	#define	get_moves_sse	get_moves	// no dispatch
+    #endif
 
 unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O)
 {
 	unsigned int	mO, movesL, movesH, flip1, pre1;
-<<<<<<< HEAD
-	uint64x1_t	rP, rO;
-	uint64x2_t	PP, OO, MM, flip, pre;
+	__m128i	OP, rOP, PP, OO, MM, flip, pre;
 
-		/* vertical_mirror in PP[1], OO[1] */				mO = (unsigned int) O & 0x7e7e7e7e;
-	rP = vreinterpret_u64_u8(vrev64_u8(vcreate_u8(P)));			flip1  = mO & ((unsigned int) P << 1);
-	PP = vcombine_u64(vcreate_u64(P), rP);					flip1 |= mO & (flip1 << 1);
-										pre1   = mO & (mO << 1);
-	rO = vreinterpret_u64_u8(vrev64_u8(vcreate_u8(O)));			flip1 |= pre1 & (flip1 << 2);
-	OO = vcombine_u64(vcreate_u64(O), rO);					flip1 |= pre1 & (flip1 << 2);
-										movesL = flip1 << 1;
-
-	flip = vandq_u64(OO, vshlq_n_u64(PP, 8));				flip1  = mO & ((unsigned int) P >> 1);
-	flip = vorrq_u64(flip, vandq_u64(OO, vshlq_n_u64(flip, 8)));		flip1 |= mO & (flip1 >> 1);
-	pre  = vandq_u64(OO, vshlq_n_u64(OO, 8));				pre1 >>= 1;
-	flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 16)));		flip1 |= pre1 & (flip1 >> 2);
-	flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 16)));		flip1 |= pre1 & (flip1 >> 2);
-	MM = vshlq_n_u64(flip, 8);						movesL |= flip1 >> 1;
-
-	OO = vandq_u64(OO, vdupq_n_u64(0x7e7e7e7e7e7e7e7e));			mO = (unsigned int) (O >> 32) & 0x7e7e7e7e;
-	flip = vandq_u64(OO, vshlq_n_u64(PP, 7));				flip1  = mO & ((unsigned int) (P >> 32) << 1);
-	flip = vorrq_u64(flip, vandq_u64(OO, vshlq_n_u64(flip, 7)));		flip1 |= mO & (flip1 << 1);
-	pre  = vandq_u64(OO, vshlq_n_u64(OO, 7));				pre1   = mO & (mO << 1);
-	flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 14)));		flip1 |= pre1 & (flip1 << 2);
-	flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 14)));		flip1 |= pre1 & (flip1 << 2);
-	MM = vorrq_u64(MM, vshlq_n_u64(flip, 7));				movesH = flip1 << 1;
-
-	flip = vandq_u64(OO, vshlq_n_u64(PP, 9));				flip1  = mO & ((unsigned int) (P >> 32) >> 1);
-	flip = vorrq_u64(flip, vandq_u64(OO, vshlq_n_u64(flip, 9)));		flip1 |= mO & (flip1 >> 1);
-	pre  = vandq_u64(OO, vshlq_n_u64(OO, 9));				pre1 >>= 1;
-	flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 18)));		flip1 |= pre1 & (flip1 >> 2);
-	flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 18)));		flip1 |= pre1 & (flip1 >> 2);
-	MM = vorrq_u64(MM, vshlq_n_u64(flip, 9));				movesH |= flip1 >> 1;
-
-	movesL |= vgetq_lane_u32(vreinterpretq_u32_u64(MM), 0) | bswap_int(vgetq_lane_u32(vreinterpretq_u32_u64(MM), 3));
-	movesH |= vgetq_lane_u32(vreinterpretq_u32_u64(MM), 1) | bswap_int(vgetq_lane_u32(vreinterpretq_u32_u64(MM), 2));
-	return (movesL | ((unsigned long long) movesH << 32)) & ~(P|O);	// mask with empties
-}
-
-#else // AVX/x86_64/arm
-/**
- * @brief SSE optimized get_moves for x86 - 3 SSE, 1 CPU(32)
- *
- */
-  #if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID)
-
-=======
-  #if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID)
-
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-    #ifdef hasSSE2
-	#define	get_moves_sse	get_moves	// no dispatch
-    #endif
-
-unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int	mO, movesL, movesH, flip1, pre1;
-	__m128i	OP, rOP, PP, OO, MM, flip, pre;
-=======
-	__m128i	OP, rOP, PP, OO, MM, flip, pre;
-<<<<<<< HEAD
-	const __m128i mask7e = _mm_set1_epi8(0x7e);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-
-		// vertical_mirror in PP[1], OO[1]
-	OP  = _mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O));		mO = (unsigned int) O & 0x7e7e7e7eU;
-	rOP = _mm_shufflelo_epi16(OP, 0x1B);						flip1  = mO & ((unsigned int) P << 1);
-	rOP = _mm_shufflehi_epi16(rOP, 0x1B);						flip1 |= mO & (flip1 << 1);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8));		pre1   = mO & (mO << 1);
-=======
-											pre1   = mO & (mO << 1);
-	rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8));
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8));		pre1   = mO & (mO << 1);
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-	    										flip1 |= pre1 & (flip1 << 2);
-	PP  = _mm_unpacklo_epi64(OP, rOP);						flip1 |= pre1 & (flip1 << 2);
-	OO  = _mm_unpackhi_epi64(OP, rOP);						movesL = flip1 << 1;
+		// vertical_mirror in PP[1], OO[1]
+	OP  = _mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O));		mO = (unsigned int) O & 0x7e7e7e7eU;
+	rOP = _mm_shufflelo_epi16(OP, 0x1B);						flip1  = mO & ((unsigned int) P << 1);
+	rOP = _mm_shufflehi_epi16(rOP, 0x1B);						flip1 |= mO & (flip1 << 1);
+	rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8));		pre1   = mO & (mO << 1);
+	    										flip1 |= pre1 & (flip1 << 2);
+	PP  = _mm_unpacklo_epi64(OP, rOP);						flip1 |= pre1 & (flip1 << 2);
+	OO  = _mm_unpackhi_epi64(OP, rOP);						movesL = flip1 << 1;
 
 	flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 8));				flip1  = mO & ((unsigned int) P >> 1);
 	flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 8)));		flip1 |= mO & (flip1 >> 1);
@@ -958,15 +448,7 @@ unsigned long long get_moves_sse(const unsigned long long P, const unsigned long
 	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 16)));	flip1 |= pre1 & (flip1 >> 2);
 	MM = _mm_slli_epi64(flip, 8);							movesL |= flip1 >> 1;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	OO = _mm_and_si128(OO, _mm_set1_epi8(0x7e));					mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU;
-=======
-	OO = _mm_and_si128(OO, mask7e);							mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	OO = _mm_and_si128(OO, _mm_set1_epi8(0x7e));					mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 7));				flip1  = mO & ((unsigned int) (P >> 32) << 1);
 	flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 7)));		flip1 |= mO & (flip1 << 1);
 	pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 7));					pre1   = mO & (mO << 1);
@@ -988,22 +470,9 @@ unsigned long long get_moves_sse(const unsigned long long P, const unsigned long
 	return (movesL | ((unsigned long long) movesH << 32)) & ~(P|O);	// mask with empties
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
   #else // non-VEX asm
 
 unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O)
-=======
-#else // non-VEX asm
-
-<<<<<<< HEAD
-unsigned long long get_moves_sse(unsigned long long P, unsigned long long O)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O)
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
 {
 	unsigned long long moves;
 	static const V2DI mask7e = {{ 0x7e7e7e7e7e7e7e7eULL, 0x7e7e7e7e7e7e7e7eULL }};
@@ -1113,50 +582,19 @@ unsigned long long get_moves_sse(const unsigned long long P, const unsigned long
 	return moves;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
   #endif // hasSSE2
 #endif // x86
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || (defined(__ARM_NEON) && !defined(DISPATCH_NEON))
 
 /**
  * @brief SSE/neon optimized get_stable_edge
-=======
-#endif // hasSSE2
-=======
-  #endif // hasSSE2
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-#endif // x86
-
-#if defined(hasSSE2) || defined(hasNeon)	// no dispatch
-
-/**
- * @brief SSE optimized get_stable_edge
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#if defined(hasSSE2) || defined(hasNeon)
-=======
-#if defined(hasSSE2) || (defined(__ARM_NEON) && !defined(DISPATCH_NEON))
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-
-/**
- * @brief SSE/neon optimized get_stable_edge
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
  *
  * @param P bitboard with player's discs.
  * @param O bitboard with opponent's discs.
  * @return a bitboard with (some of) player's stable discs.
  *
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
   #if defined(__aarch64__) || defined(_M_ARM64)	// for vaddvq
 unsigned long long get_stable_edge(unsigned long long P, unsigned long long O)
 {	// compute the exact stable edges (from precomputed tables)
@@ -1168,7 +606,6 @@ unsigned long long get_stable_edge(unsigned long long P, unsigned long long O)
 	return edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)]
 	    |  (unsigned long long) edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 56
 	    |  unpackA2A7(a1a8) | unpackH2H7(h1h8);
-<<<<<<< HEAD
 }
 
   #elif defined(__ARM_NEON)	// Neon kindergarten
@@ -1195,75 +632,6 @@ unsigned long long get_stable_edge(const unsigned long long P, const unsigned lo
 {
 	// compute the exact stable edges (from precomputed tables)
 	unsigned int a1a8, h1h8;
-=======
-static unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
-=======
-#if defined(__aarch64__) || defined(_M_ARM64)
-unsigned long long get_stable_edge(unsigned long long P, unsigned long long O)
-=======
-#if defined(__aarch64__) || defined(_M_ARM64)	// for vaddvq
-<<<<<<< HEAD
-unsigned long long get_stable_edge_sse(unsigned long long P, unsigned long long O)
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-unsigned long long get_stable_edge(unsigned long long P, unsigned long long O)
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-{	// compute the exact stable edges (from precomputed tables)
-	// const int16x8_t shiftv = { 0, 1, 2, 3, 4, 5, 6, 7 };	// error on MSVC
-	const uint64x2_t shiftv = { 0x0003000200010000, 0x0007000600050004 };
-	uint8x16_t PO = vzip1q_u8(vreinterpretq_u8_u64(vdupq_n_u64(O)), vreinterpretq_u8_u64(vdupq_n_u64(P)));
-	unsigned int a1a8 = edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vandq_u8(PO, vdupq_n_u8(1))), vreinterpretq_s16_u64(shiftv)))];
-	unsigned int h1h8 = edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vshrq_n_u8(PO, 7)), vreinterpretq_s16_u64(shiftv)))];
-	return edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)]
-	    |  (unsigned long long) edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 56
-	    |  unpackA1A8(a1a8) | unpackH1H8(h1h8);
-=======
->>>>>>> 21206f2 (Exclude corners from unpackA2A7/H2H7 to ease CPU_64 kindergarten)
-}
-
-  #elif defined(hasNeon) // Neon kindergarten
-unsigned long long get_stable_edge(unsigned long long P, unsigned long long O)
-{	// compute the exact stable edges (from precomputed tables)
-	const uint64x2_t kMul  = { 0x1020408001020408, 0x1020408001020408 };
-	uint64x2_t PP = vcombine_u64(vshl_n_u64(vcreate_u64(P), 7), vcreate_u64(P));
-	uint64x2_t OO = vcombine_u64(vshl_n_u64(vcreate_u64(O), 7), vcreate_u64(O));
-	uint32x4_t QP = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(PP), 7)));
-	uint32x4_t QO = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(OO), 7)));
-	uint32x2_t DP = vpadd_u32(vget_low_u32(QP), vget_high_u32(QP));	// P_h1h8 * * * P_a1a8 * * *
-	uint32x2_t DO = vpadd_u32(vget_low_u32(QO), vget_high_u32(QO));	// O_h1h8 * * * O_a1a8 * * *
-	uint8x8_t DB = vtrn_u8(vreinterpret_u8_u32(DO), vreinterpret_u8_u32(DP)).val[1];	// P_h1h8 O_h1h8 * * P_a1a8 O_a1a8 * *
-	unsigned int a1a8 = edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 1)];
-	unsigned int h1h8 = edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 3)];
-	uint8x16_t PO = vzipq_u8(vreinterpretq_u8_u64(OO), vreinterpretq_u8_u64(PP)).val[1];
-	return edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)]
-	    |  (unsigned long long) edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 56
-	    |  unpackA2A7(a1a8) | unpackH2H7(h1h8);
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-#elif defined(__x86_64__) || defined(_M_X64)
-=======
-  #elif defined(hasSSE2)
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#elif defined(hasSSE2) || defined(USE_MSVC_X86)
-<<<<<<< HEAD
-unsigned long long get_stable_edge_sse(const unsigned long long P, const unsigned long long O)
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-{
-	// compute the exact stable edges (from precomputed tables)
-<<<<<<< HEAD
-	unsigned int a1a8po, h1h8po;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	unsigned int a1a8, h1h8;
->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8)
 	unsigned long long stable_edge;
 
 	__m128i	P0 = _mm_cvtsi64_si128(P);
@@ -1273,8 +641,6 @@ unsigned long long get_stable_edge(const unsigned long long P, const unsigned lo
 		| ((unsigned long long) edge_stability[_mm_extract_epi16(PO, 7)] << 56);
 
 	PO = _mm_unpacklo_epi64(O0, P0);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	a1a8 = edge_stability[_mm_movemask_epi8(_mm_slli_epi64(PO, 7))];
 	h1h8 = edge_stability[_mm_movemask_epi8(PO)];
 	stable_edge |= unpackA2A7(a1a8) | unpackH2H7(h1h8);
@@ -1282,13 +648,11 @@ unsigned long long get_stable_edge(const unsigned long long P, const unsigned lo
 	return stable_edge;
 }
   #endif
-<<<<<<< HEAD
 
 /**
  * @brief SSE/neon optimized get_edge_stability
  *
  * Compute the exact stable edges from precomputed tables.
-<<<<<<< HEAD
  *
  * @param P bitboard with player's discs.
  * @param O bitboard with opponent's discs.
@@ -1341,102 +705,11 @@ int get_edge_stability(const unsigned long long P, const unsigned long long O)
 
 /**
  * @brief AVX2/SSE/neon optimized get_full_lines.
-=======
-	a1a8po = _mm_movemask_epi8(_mm_slli_epi64(PO, 7));
-	h1h8po = _mm_movemask_epi8(PO);
-#if 0 // def __BMI2__ // pdep is slow on AMD
-	stable_edge |= _pdep_u64(edge_stability[a1a8po], 0x0101010101010101)
-		| _pdep_u64(edge_stability[h1h8po], 0x8080808080808080);
-#else
-	stable_edge |= A1_A8[edge_stability[a1a8po]] | (A1_A8[edge_stability[h1h8po]] << 7);
-#endif
-=======
-	a1a8 = edge_stability[_mm_movemask_epi8(_mm_slli_epi64(PO, 7))];
-	h1h8 = edge_stability[_mm_movemask_epi8(PO)];
-	stable_edge |= unpackA2A7(a1a8) | unpackH2H7(h1h8);
-
->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8)
-	return stable_edge;
-}
-#endif
-=======
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-
-/**
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @brief X64 optimized get_stability
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-=======
- * @brief SSE optimized get_edge_stability
-=======
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return the number of stable discs on the edges.
- *
- */
-  #if defined(__aarch64__) || defined(_M_ARM64)	// for vaddvq
-int get_edge_stability(const unsigned long long P, const unsigned long long O)
-{
-	const uint64x2_t shiftv = { 0x0003000200010000, 0x0007000600050004 };
-	uint8x16_t PO = vzip1q_u8(vreinterpretq_u8_u64(vdupq_n_u64(O)), vreinterpretq_u8_u64(vdupq_n_u64(P)));
-	uint8x8_t packedstable = vcreate_u8((edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)]
-	  | edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 8) & 0x7e7e);
-	packedstable = vset_lane_u8(edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vandq_u8(PO, vdupq_n_u8(1))), vreinterpretq_s16_u64(shiftv)))], packedstable, 2);
-	packedstable = vset_lane_u8(edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vshrq_n_u8(PO, 7)), vreinterpretq_s16_u64(shiftv)))], packedstable, 3);
-	return vaddv_u8(vcnt_u8(packedstable));
-}
-
-  #elif defined(__ARM_NEON)	// Neon kindergarten
-int get_edge_stability(const unsigned long long P, const unsigned long long O)
-{
-	const uint64x2_t kMul  = { 0x1020408001020408, 0x1020408001020408 };
-	uint64x2_t PP = vcombine_u64(vshl_n_u64(vcreate_u64(P), 7), vcreate_u64(P));
-	uint64x2_t OO = vcombine_u64(vshl_n_u64(vcreate_u64(O), 7), vcreate_u64(O));
-	uint32x4_t QP = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(PP), 7)));
-	uint32x4_t QO = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(OO), 7)));
-	uint32x2_t DP = vpadd_u32(vget_low_u32(QP), vget_high_u32(QP));	// P_h1h8 * * * P_a1a8 * * *
-	uint32x2_t DO = vpadd_u32(vget_low_u32(QO), vget_high_u32(QO));	// O_h1h8 * * * O_a1a8 * * *
-	uint8x8_t DB = vtrn_u8(vreinterpret_u8_u32(DO), vreinterpret_u8_u32(DP)).val[1];	// P_h1h8 O_h1h8 * * P_a1a8 O_a1a8 * *
-	uint8x16_t PO = vzipq_u8(vreinterpretq_u8_u64(OO), vreinterpretq_u8_u64(PP)).val[1];
-	uint8x8_t packedstable = vcreate_u8((edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)]
-	  | edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 8) & 0x7e7e);
-	packedstable = vset_lane_u8(edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 1)], packedstable, 2);
-	packedstable = vset_lane_u8(edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 3)], packedstable, 3);
-	return vget_lane_u32(vpaddl_u16(vpaddl_u8(vcnt_u8(packedstable))), 0);
-}
-
-  #elif defined(hasSSE2)
-int get_edge_stability(const unsigned long long P, const unsigned long long O)
-{
-	__m128i	P0 = _mm_cvtsi64_si128(P);
-	__m128i	O0 = _mm_cvtsi64_si128(O);
-	__m128i	PO = _mm_unpacklo_epi8(O0, P0);
-	unsigned int packedstable = edge_stability[_mm_extract_epi16(PO, 0)] | edge_stability[_mm_extract_epi16(PO, 7)] << 8;
-	PO = _mm_unpacklo_epi64(O0, P0);
-	packedstable |= edge_stability[_mm_movemask_epi8(_mm_slli_epi64(PO, 7))] << 16 | edge_stability[_mm_movemask_epi8(PO)] << 24;
-	return bit_count_32(packedstable & 0xffff7e7e);
-}
-  #endif
-
-/**
-<<<<<<< HEAD
->>>>>>> dc7c79c (Omit unpack from get_edge_stability)
- * @brief AVX2/SSE optimized get_stability
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-=======
- * @brief AVX2/SSE/neon optimized get_full_lines.
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
  *
  * SSE pcmpeqb for horizontal get_full_lines.
  * CPU rotate for vertical get_full_lines.
  * Diag-7 is converted to diag-9 using vertical mirroring.
  * 
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @param disc all discs on the board.
  * @param full all 1 if full line, otherwise all 0.
  */
@@ -1446,48 +719,14 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc)
 {
 	__m128i l81, l79, l8;
 	__m256i	v4_disc, lr79;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const __m128i kff  = _mm_set1_epi8(-1);
-=======
-	const __m128i kff  = _mm_set1_epi8(0xff);
->>>>>>> 593fff4 (use appropriate _mm_set1)
-=======
 	const __m128i kff  = _mm_set1_epi8(-1);
->>>>>>> 47c2589 (Fix w32-modern build and gcc build)
-    #if 0 // PCMPEQQ
-=======
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return the number of stable discs.
-=======
- * @param disc all discs on the board.
- * @param full all 1 if full line, otherwise all 0.
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
- */
-  #ifdef __AVX2__
-
-static __m256i vectorcall get_full_lines(const unsigned long long disc)
-{
-	__m128i l81, l79, l8;
-	__m256i	v4_disc, lr79;
-	const __m128i kff  = _mm_set1_epi64x(0xffffffffffffffff);
-<<<<<<< HEAD
-#if 0 // PCMPEQQ
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
     #if 0 // PCMPEQQ
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 	static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }};	// V8SI
 	static const V4DI m792 = {{ 0x0000008040201008, 0x0000000102040810, 0x1008040201000000, 0x0810204080000000 }};
 	static const V4DI m793 = {{ 0x0000804020100804, 0x0000010204081020, 0x2010080402010000, 0x0408102040800000 }};
 	static const V4DI m794 = {{ 0x0080402010080402, 0x0001020408102040, 0x4020100804020100, 0x0204081020408000 }};
 	static const V2DI m795 = {{ 0x8040201008040201, 0x0102040810204080 }};
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
 	l81 = _mm_cvtsi64_si128(disc);				v4_disc = _mm256_broadcastq_epi64(l81);
 	l81 = _mm_cmpeq_epi8(kff, l81);				lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4);
 								lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4));
@@ -1495,36 +734,14 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc)
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 1));	lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4));
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 2));	l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2);
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 4));	l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)));
-<<<<<<< HEAD
 
     #elif 0 // PCMPEQD
-=======
-	l81 = _mm_cvtsi64_si128(disc);		v4_disc = _mm256_broadcastq_epi64(l81);
-	l81 = _mm_cmpeq_epi8(kff, l81);		lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4);
-						lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4));
-	l8 = disc;				lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m793.v4), m793.v4), m793.v4));
-	l8 &= (l8 >> 8) | (l8 << 56);		lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4));
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2);
-	l8 &= (l8 >> 32) | (l8 << 32);		l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)));
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-
-<<<<<<< HEAD
-#elif 0 // PCMPEQD
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-    #elif 0 // PCMPEQD
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 	__m256i lm79;
 	static const V4DI m790 = {{ 0x80c0e0f0783c1e0f, 0x0103070f1e3c78f0, 0x70381c0e07030100, 0x0e1c3870e0c08000 }};
 	static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }};	// V8SI
 	static const V4DI m792 = {{ 0x2010884440201088, 0x0408112202040811, 0x2211080411080402, 0x4488102088102040 }};	// V8SI
 	static const V4DI m793 = {{ 0x8844221110884422, 0x1122448808112244, 0x0000000044221108, 0x0000000022448810 }};	// V8SI
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
 	l81 = _mm_cvtsi64_si128(disc);				v4_disc = _mm256_broadcastq_epi64(l81);
 	l81 = _mm_cmpeq_epi8(kff, l81);				lm79 = _mm256_and_si256(v4_disc, m790.v4);
 								lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1));
@@ -1533,27 +750,8 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc)
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 2));	lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4);
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 4));	lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4));
 								l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79));
-<<<<<<< HEAD
 
     #else // Kogge-Stone
-=======
-	l81 = _mm_cvtsi64_si128(disc);		v4_disc = _mm256_broadcastq_epi64(l81);
-	l81 = _mm_cmpeq_epi8(kff, l81);		lm79 = _mm256_and_si256(v4_disc, m790.v4);
-						lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1));
-	l8 = disc;				lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m792.v4), m792.v4), m792.v4);
-	l8 &= (l8 >> 8) | (l8 << 56);		lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m793.v4), m793.v4), m793.v4));
-	l8 &= (l8 >> 16) | (l8 << 48);		lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4);
-	l8 &= (l8 >> 32) | (l8 << 32);		lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4));
-						l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79));
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-
-<<<<<<< HEAD
-#else // Kogge-Stone
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-    #else // Kogge-Stone
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 	const __m128i mcpyswap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
 	const __m128i mbswapll = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
 	static const V4DI shiftlr[] = {{{ 9, 7, 7, 9 }}, {{ 18, 14, 14, 18 }}, {{ 36, 28, 28, 36 }}};
@@ -1561,10 +759,6 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc)
 	static const V4DI e791 = {{ 0xffffc0c0c0c0c0c0, 0xffff030303030303, 0xffff030303030303, 0xffffc0c0c0c0c0c0 }};
 	static const V4DI e792 = {{ 0xfffffffff0f0f0f0, 0xffffffff0f0f0f0f, 0xffffffff0f0f0f0f, 0xfffffffff0f0f0f0 }};
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
 	l81 = _mm_cvtsi64_si128(disc);				v4_disc = _mm256_castsi128_si256(_mm_shuffle_epi8(l81, mcpyswap));
 	l81 = _mm_cmpeq_epi8(kff, l81);				v4_disc = _mm256_permute4x64_epi64(v4_disc, 0x50);	// disc, disc, rdisc, rdisc
 								lr79 = _mm256_and_si256(v4_disc, _mm256_or_si256(e790.v4, _mm256_srlv_epi64(v4_disc, shiftlr[0].v4)));
@@ -1572,16 +766,12 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc)
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 1));	lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e792.v4, _mm256_srlv_epi64(lr79, shiftlr[2].v4)));
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 2));	l79 = _mm_shuffle_epi8(_mm256_extracti128_si256(lr79, 1), mbswapll);
 	l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 4));	l79 = _mm_and_si128(l79, _mm256_castsi256_si128(lr79));
-<<<<<<< HEAD
-<<<<<<< HEAD
     #endif
 	l81 = _mm_unpacklo_epi64(l81, l8);
 	return _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1);
-<<<<<<< HEAD
 }
 
   #elif defined(__ARM_NEON)
-<<<<<<< HEAD
 
 void get_full_lines(const unsigned long long disc, unsigned long long full[4])
 {
@@ -1631,1062 +821,6 @@ void get_full_lines(const unsigned long long disc, unsigned long long full[4])
   #endif
 #endif // hasSSE2/__ARM_NEON
 
-#ifdef __AVX2__
-/**
- * @brief AVX2 optimized get_stability
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return the number of stable discs.
- */
-
-// compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-static int vectorcall get_spreaded_stability(unsigned long long stable, unsigned long long P_central, __m256i v4_full)
-{
-	__m128i	v2_stable, v2_old_stable, v2_P_central;
-	__m256i	v4_stable;
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-=======
-	l81 = _mm_cvtsi64_si128(disc);		v4_disc = _mm256_castsi128_si256(_mm_shuffle_epi8(l81, mcpyswap));
-	l81 = _mm_cmpeq_epi8(kff, l81);		lr79 = _mm256_permute4x64_epi64(v4_disc, 0x50);	// disc, disc, rdisc, rdisc
-						lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e790.v4, _mm256_srlv_epi64(lr79, shiftlr[0].v4)));
-	l8 = disc;				lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e791.v4, _mm256_srlv_epi64(lr79, shiftlr[1].v4)));
-	l8 &= (l8 >> 8) | (l8 << 56);		lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e792.v4, _mm256_srlv_epi64(lr79, shiftlr[2].v4)));
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = _mm_shuffle_epi8(_mm256_extracti128_si256(lr79, 1), mbswapll);
-	l8 &= (l8 >> 32) | (l8 << 32);		l79 = _mm_and_si128(l79, _mm256_castsi256_si128(lr79));
-
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-#endif
-=======
-    #endif
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-	l81 = _mm_unpacklo_epi64(l81, l8);
-	_mm256_storeu_si256((__m256i *) full, _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1));
-	l81 = _mm_and_si128(l81, l79);
-	_mm_storel_epi64((__m128i *) &full[4], _mm_and_si128(l81, _mm_shuffle_epi32(l81, 0x4e)));
-}
-
-int get_stability_fulls(const unsigned long long P, const unsigned long long O, unsigned long long full[5])
-{
-	unsigned long long stable, P_central;
-	__m128i	v2_stable, v2_old_stable, v2_P_central;
-	__m256i	v4_stable, v4_full;
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// add full lines
-	v2_stable = _mm_and_si128(l81, l79);
-	stable |= _mm_cvtsi128_si64(_mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable))) & P_central;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	// compute the exact stable edges (from precomputed tables) and add full lines
-<<<<<<< HEAD
-	stable = get_stable_edge_sse(P, O) | (get_all_full_lines(disc, &full) & P_central);
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-=======
-	stable = get_stable_edge_sse(P, O) | (get_all_full_lines(P | O, &full) & P_central);
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-=======
-	// compute the exact stable edges (from precomputed tables)
-=======
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-	get_all_full_lines(P | O, full);
-
-	// compute the exact stable edges (from precomputed tables)
-	stable = get_stable_edge(P, O);
-
-	// add full lines
-	P_central = (P & 0x007e7e7e7e7e7e00);
-<<<<<<< HEAD
-	stable |= (allfull & P_central);
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
-	stable |= (full[4] & P_central);
->>>>>>> 4303b09 (Returns all full lines in full[4])
-
-	if (stable == 0)
-		return 0;
-
-<<<<<<< HEAD
-=======
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-<<<<<<< HEAD
-	v4_full = _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability)
-	v2_stable = _mm_cvtsi64_si128(stable);
-	v2_P_central = _mm_cvtsi64_si128(P_central);
-	v4_full = _mm256_loadu_si256((__m256i *) full);
-	do {
-		v2_old_stable = v2_stable;
-		v4_stable = _mm256_broadcastq_epi64(v2_stable);
-		v4_stable = _mm256_or_si256(_mm256_or_si256(_mm256_srlv_epi64(v4_stable, shift1897), _mm256_sllv_epi64(v4_stable, shift1897)), v4_full);
-		v2_stable = _mm_and_si128(_mm256_castsi256_si128(v4_stable), _mm256_extracti128_si256(v4_stable, 1));
-		v2_stable = _mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable));
-		v2_stable = _mm_or_si128(v2_old_stable, _mm_and_si128(v2_stable, v2_P_central));
-	} while (!_mm_testc_si128(v2_old_stable, v2_stable));
-
-	return bit_count(_mm_cvtsi128_si64(v2_stable));
-=======
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
-}
-<<<<<<< HEAD
-#elif defined(hasSSE2) && !defined(HAS_CPU_64)
-// 32bit SSE optimized get_spreaded_stability
-int get_spreaded_stability(unsigned long long stable, unsigned long long P_central, unsigned long long full[4])
-{
-	__m128i v_stable, stable_vh, stable_d79, old_stable;
-
-	if (stable == 0)	// (2%)
-		return 0;
-
-	v_stable = _mm_cvtsi64_si128(stable);
-	do {
-		old_stable = v_stable;
-		stable_vh = _mm_loadu_si128((__m128i *) &full[0]);
-		stable_vh = _mm_or_si128(stable_vh, _mm_unpacklo_epi64(_mm_srli_epi64(v_stable, 1), _mm_srli_epi64(v_stable, 8)));
-		stable_vh = _mm_or_si128(stable_vh, _mm_unpacklo_epi64(_mm_slli_epi64(v_stable, 1), _mm_slli_epi64(v_stable, 8)));
-		stable_d79 = _mm_loadu_si128((__m128i *) &full[2]);
-		stable_d79 = _mm_or_si128(stable_d79, _mm_unpacklo_epi64(_mm_srli_epi64(v_stable, 9), _mm_srli_epi64(v_stable, 7)));
-		stable_d79 = _mm_or_si128(stable_d79, _mm_unpacklo_epi64(_mm_slli_epi64(v_stable, 9), _mm_slli_epi64(v_stable, 7)));
-		v_stable = _mm_and_si128(stable_vh, stable_d79);
-		v_stable = _mm_and_si128(v_stable, _mm_unpackhi_epi64(v_stable, v_stable));
-		v_stable = _mm_or_si128(old_stable, _mm_and_si128(v_stable, _mm_loadl_epi64((__m128i *) &P_central)));
-	} while (_mm_movemask_epi8(_mm_cmpeq_epi8(v_stable, old_stable)) != 0xffff);	// (44%)
-
-	return bit_count_si64(v_stable);
-}
-#endif
-
-#ifdef __AVX2__
-// returns stability count only
-int get_stability(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long stable = get_stable_edge(P, O);	// compute the exact stable edges
-	unsigned long long P_central = P & 0x007e7e7e7e7e7e00;
-
-	__m256i	v4_full = get_full_lines(P | O);	// add full lines
-	__m128i v2_full = _mm_and_si128(_mm256_castsi256_si128(v4_full), _mm256_extracti128_si256(v4_full, 1));
-	stable |= (P_central & _mm_cvtsi128_si64(_mm_and_si128(v2_full, _mm_unpackhi_epi64(v2_full, v2_full))));
-
-	return get_spreaded_stability(stable, P_central, v4_full);	// compute the other stable discs
-}
-
-// returns all full in full[4] in addition to stability count
-int get_stability_fulls(const unsigned long long P, const unsigned long long O, unsigned long long full[5])
-{
-	unsigned long long stable = get_stable_edge(P, O);	// compute the exact stable edges
-	unsigned long long P_central = P & 0x007e7e7e7e7e7e00;
-
-	__m256i	v4_full = get_full_lines(P | O);	// add full lines
-	__m128i v2_full = _mm_and_si128(_mm256_castsi256_si128(v4_full), _mm256_extracti128_si256(v4_full, 1));
-	// _mm256_storeu_si256((__m256i *) full, v4_full);
-	full[4] = _mm_cvtsi128_si64(_mm_and_si128(v2_full, _mm_unpackhi_epi64(v2_full, v2_full)));
-	stable |= (P_central & full[4]);
-
-	return get_spreaded_stability(stable, P_central, v4_full);	// compute the other stable discs
-}
-
-// returns all full lines only
-unsigned long long get_all_full_lines(const unsigned long long disc)
-{
-	__m256i v4_full = get_full_lines(disc);
-	__m128i v2_full = _mm_and_si128(_mm256_castsi256_si128(v4_full), _mm256_extracti128_si256(v4_full, 1));
-	return _mm_cvtsi128_si64(_mm_and_si128(v2_full, _mm_unpackhi_epi64(v2_full, v2_full)));
-}
-
-/**
- * @brief AVX2 optimized get_moves + get_potential_moves.
- *
- * Get the bitboard of empty squares in contact of a player square, as well as real mobility.
- *
- * @param PP broadcasted bitboard with player's discs.
- * @param OO broadcasted bitboard with opponent's discs.
- * @return potential moves in a higner 64-bit, real moves in a lower 64-bit.
- */
-__m128i vectorcall get_moves_and_potential(__m256i PP, __m256i OO)
-{
-	__m256i	MM, potmob, flip_l, flip_r, pre_l, pre_r, shift2;
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-	__m256i	mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x007E7E7E7E7E7E00, 0x007E7E7E7E7E7E00, 0x00FFFFFFFFFFFF00, 0x7E7E7E7E7E7E7E7E));
-	__m128i occupied = _mm_or_si128(_mm256_castsi256_si128(PP), _mm256_castsi256_si128(OO));
-
-	flip_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(PP, shift1897));
-	flip_r = _mm256_and_si256(mOO, _mm256_srlv_epi64(PP, shift1897));
-	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(mOO, _mm256_sllv_epi64(flip_l, shift1897)));
-	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(mOO, _mm256_srlv_epi64(flip_r, shift1897)));
-	pre_l = _mm256_sllv_epi64(mOO, shift1897);	pre_r = _mm256_srlv_epi64(mOO, shift1897);
-	potmob = _mm256_or_si256(pre_l, pre_r);
-	pre_l = _mm256_and_si256(mOO, pre_l);		pre_r = _mm256_and_si256(mOO, pre_r);
-	shift2 = _mm256_add_epi64(shift1897, shift1897);
-	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2)));
-	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2)));
-	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2)));
-	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2)));
-	MM = _mm256_or_si256(_mm256_sllv_epi64(flip_l, shift1897), _mm256_srlv_epi64(flip_r, shift1897));
-
-	MM = _mm256_or_si256(_mm256_unpacklo_epi64(MM, potmob), _mm256_unpackhi_epi64(MM, potmob));
-	return _mm_andnot_si128(occupied, _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1)));	// mask with empties
-}
-
-#endif
-=======
-/**
- * @file board_sse.c
- *
- * SSE/AVX translation of some board.c functions
- *
- * @date 2014 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include "bit.h"
-#include "hash.h"
-#include "board.h"
-
-/**
- * @brief SSE2 translation of board_symetry
- *
- * @param board input board
- * @param s symetry
- * @param sym symetric output board
- */
-#ifdef hasSSE2
-
-void board_symetry(const Board *board, const int s, Board *sym)
-{
-	__m128i	bb = _mm_loadu_si128((__m128i *) board);
-	__m128i	tt;
-	const __m128i mask0F0F = _mm_set1_epi16(0x0F0F);
-	const __m128i mask00AA = _mm_set1_epi16(0x00AA);
-	const __m128i maskCCCC = _mm_set1_epi32(0x0000CCCC);
-	const __m128i mask00F0 = _mm_set1_epi64x(0x00000000F0F0F0F0);
-#if defined(__SSSE3__) || defined(__AVX__)	// pshufb
-	const __m128i mbswapll = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
-	const __m128i mbitrev  = _mm_set_epi8(15, 7, 11, 3, 13, 5, 9, 1, 14, 6, 10, 2, 12, 4, 8, 0);
-
-	if (s & 1) {	// horizontal_mirror (cf. http://wm.ite.pl/articles/sse-popcount.html)
-		bb = _mm_or_si128(_mm_shuffle_epi8(mbitrev, _mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F)),
-			_mm_slli_epi64(_mm_shuffle_epi8(mbitrev, _mm_and_si128(bb, mask0F0F)), 4));
-	}
-
-	if (s & 2) {	// vertical_mirror
-		bb = _mm_shuffle_epi8(bb, mbswapll);
-	}
-
-#else
-	const __m128i mask5555 = _mm_set1_epi16(0x5555);
-	const __m128i mask3333 = _mm_set1_epi16(0x3333);
-
-	if (s & 1) {	// horizontal_mirror
-		bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 1), mask5555), _mm_slli_epi64(_mm_and_si128(bb, mask5555), 1));
-		bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 2), mask3333), _mm_slli_epi64(_mm_and_si128(bb, mask3333), 2));
-		bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F), _mm_slli_epi64(_mm_and_si128(bb, mask0F0F), 4));
-	}
-
-	if (s & 2) {	// vertical_mirror
-		bb = _mm_or_si128(_mm_srli_epi16(bb, 8), _mm_slli_epi16(bb, 8));
-		bb = _mm_shufflehi_epi16(_mm_shufflelo_epi16(bb, 0x1b), 0x1b);
-	}
-#endif
-
-	if (s & 4) {	// transpose
-		tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 7)), mask00AA);
-		bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 7));
-		tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 14)), maskCCCC);
-		bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 14));
-		tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 28)), mask00F0);
-		bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 28));
-	}
-
-#ifdef __clang__
-	sym->player = bb[0];
-	sym->opponent = bb[1];
-#else	// error on clang 3.8
-	_mm_storeu_si128((__m128i *) sym, bb);
-#endif
-
-	board_check(sym);
-}
-
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)
-/**
- * @brief Compute a board resulting of a move played on a previous board.
- *
- * @param board board to play the move on.
- * @param x move to play.
- * @param next resulting board.
- * @return flipped discs.
- */
-unsigned long long board_next(const Board *board, const int x, Board *next)
-{
-	__m128i OP = _mm_loadu_si128((__m128i *) board);
-	__m128i flipped = mm_Flip(OP, x);
-
-	OP = _mm_xor_si128(OP, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
-	_mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(OP, 0x4e));
-
-	return _mm_cvtsi128_si64(flipped);
-}
-
-/**
- * @brief Compute a board resulting of an opponent move played on a previous board.
- *
- * Compute the board after passing and playing a move.
- *
- * @param board board to play the move on.
- * @param x opponent move to play.
- * @param next resulting board.
- * @return flipped discs.
- */
-unsigned long long board_pass_next(const Board *board, const int x, Board *next)
-{
-	__m128i	PO = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e);
-	__m128i flipped = mm_Flip(PO, x);
-
-	PO = _mm_xor_si128(PO, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
-	_mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(PO, 0x4e));
-
-	return _mm_cvtsi128_si64(flipped);
-}
-#endif
-
-#endif // hasSSE2
-
-/**
- * @brief X64 optimized get_moves
- *
- * Diag-7 is converted to diag-9 (v.v.) using vertical mirroring
- * in SSE versions.
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return all legal moves in a 64-bit unsigned integer.
- */
-#ifdef __AVX2__	// 4 AVX
-
-unsigned long long get_moves(const unsigned long long P, const unsigned long long O)
-{
-	__m256i	PP, mOO, MM, flip_l, flip_r, pre_l, pre_r, shift2;
-	__m128i	M;
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-	const __m256i mflipH = _mm256_set_epi64x(0x7e7e7e7e7e7e7e7e, 0x7e7e7e7e7e7e7e7e, -1, 0x7e7e7e7e7e7e7e7e);
-
-	PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P));
-	mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(O)), mflipH);
-
-	flip_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(PP, shift1897));
-	flip_r = _mm256_and_si256(mOO, _mm256_srlv_epi64(PP, shift1897));
-	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(mOO, _mm256_sllv_epi64(flip_l, shift1897)));
-	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(mOO, _mm256_srlv_epi64(flip_r, shift1897)));
-	pre_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(mOO, shift1897));
-	pre_r = _mm256_srlv_epi64(pre_l, shift1897);
-	shift2 = _mm256_add_epi64(shift1897, shift1897);
-	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2)));
-	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2)));
-	flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2)));
-	flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2)));
-	MM = _mm256_sllv_epi64(flip_l, shift1897);
-	MM = _mm256_or_si256(MM, _mm256_srlv_epi64(flip_r, shift1897));
-
-	M = _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1));
-	M = _mm_or_si128(M, _mm_unpackhi_epi64(M, M));
-	return _mm_cvtsi128_si64(M) & ~(P|O);	// mask with empties
-}
-
-#elif defined(__x86_64__) || defined(_M_X64)	// 2 SSE, 2 CPU
-
-unsigned long long get_moves(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long moves, mO, flip1, pre1, flip8, pre8;
-	__m128i	PP, mOO, MM, flip, pre;
-
-	mO = O & 0x7e7e7e7e7e7e7e7eULL;
-	PP  = _mm_set_epi64x(vertical_mirror(P), P);
-	mOO = _mm_set_epi64x(vertical_mirror(mO), mO);
-		/* shift=-9:+7 */								/* shift=+1 */			/* shift = +8 */
-	flip = _mm_and_si128(mOO, _mm_slli_epi64(PP, 7));				flip1  = mO & (P << 1);		flip8  = O & (P << 8);
-	flip = _mm_or_si128(flip, _mm_and_si128(mOO, _mm_slli_epi64(flip, 7)));		flip1 |= mO & (flip1 << 1);	flip8 |= O & (flip8 << 8);
-	pre  = _mm_and_si128(mOO, _mm_slli_epi64(mOO, 7));				pre1   = mO & (mO << 1);	pre8   = O & (O << 8);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14)));	flip1 |= pre1 & (flip1 << 2);	flip8 |= pre8 & (flip8 << 16);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14)));	flip1 |= pre1 & (flip1 << 2);	flip8 |= pre8 & (flip8 << 16);
-	MM = _mm_slli_epi64(flip, 7);							moves = flip1 << 1;		moves |= flip8 << 8;
-		/* shift=-7:+9 */								/* shift=-1 */			/* shift = -8 */
-	flip = _mm_and_si128(mOO, _mm_slli_epi64(PP, 9));				flip1  = mO & (P >> 1);		flip8  = O & (P >> 8);
-	flip = _mm_or_si128(flip, _mm_and_si128(mOO, _mm_slli_epi64(flip, 9)));		flip1 |= mO & (flip1 >> 1);	flip8 |= O & (flip8 >> 8);
-	pre = _mm_and_si128(mOO, _mm_slli_epi64(mOO, 9));				pre1 >>= 1;			pre8 >>= 8;
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18)));	flip1 |= pre1 & (flip1 >> 2);	flip8 |= pre8 & (flip8 >> 16);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18)));	flip1 |= pre1 & (flip1 >> 2);	flip8 |= pre8 & (flip8 >> 16);
-	MM = _mm_or_si128(MM, _mm_slli_epi64(flip, 9));					moves |= flip1 >> 1;		moves |= flip8 >> 8;
-
-	moves |= _mm_cvtsi128_si64(MM) | vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(MM, MM)));
-	return moves & ~(P|O);	// mask with empties
-}
-
-#elif 0	// 4 CPU
-
-unsigned long long get_moves(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long moves, mO;
-	unsigned long long flip1, flip7, flip9, flip8, pre1, pre7, pre9, pre8;
-
-	mO = O & 0x7e7e7e7e7e7e7e7eULL;
-	flip1  = mO & (P << 1);		flip7  = mO & (P << 7);		flip9  = mO & (P << 9);		flip8  = O & (P << 8);
-	flip1 |= mO & (flip1 << 1);	flip7 |= mO & (flip7 << 7);	flip9 |= mO & (flip9 << 9);	flip8 |= O & (flip8 << 8);
-	pre1 = mO & (mO << 1);		pre7 = mO & (mO << 7);		pre9 = mO & (mO << 9);		pre8 = O & (O << 8);
-	flip1 |= pre1 & (flip1 << 2);	flip7 |= pre7 & (flip7 << 14);	flip9 |= pre9 & (flip9 << 18);	flip8 |= pre8 & (flip8 << 16);
-	flip1 |= pre1 & (flip1 << 2);	flip7 |= pre7 & (flip7 << 14);	flip9 |= pre9 & (flip9 << 18);	flip8 |= pre8 & (flip8 << 16);
-	moves = flip1 << 1;		moves |= flip7 << 7;		moves |= flip9 << 9;		moves |= flip8 << 8;
-	flip1  = mO & (P >> 1);		flip7  = mO & (P >> 7);		flip9  = mO & (P >> 9);		flip8  = O & (P >> 8);
-	flip1 |= mO & (flip1 >> 1);	flip7 |= mO & (flip7 >> 7);	flip9 |= mO & (flip9 >> 9);	flip8 |= O & (flip8 >> 8);
-	pre1 >>= 1;			pre7 >>= 7;			pre9 >>= 9;			pre8 >>= 8;
-	flip1 |= pre1 & (flip1 >> 2);	flip7 |= pre7 & (flip7 >> 14);	flip9 |= pre9 & (flip9 >> 18);	flip8 |= pre8 & (flip8 >> 16);
-	flip1 |= pre1 & (flip1 >> 2);	flip7 |= pre7 & (flip7 >> 14);	flip9 |= pre9 & (flip9 >> 18);	flip8 |= pre8 & (flip8 >> 16);
-	moves |= flip1 >> 1;		moves |= flip7 >> 7;		moves |= flip9 >> 9;		moves |= flip8 >> 8;
-
-	return moves & ~(P|O);	// mask with empties
-}
-
-#else // __x86_64__
-/**
- * @brief SSE optimized get_moves for x86 (3 SSE, 1 CPU)
- *
- */
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-
-unsigned long long get_moves_sse(unsigned long long P, unsigned long long O)
-{
-	unsigned int	mO, movesL, movesH, flip1, pre1;
-	__m128i	OP, rOP, PP, OO, MM, flip, pre;
-	const __m128i mask7e = _mm_set1_epi8(0x7e);
-
-		// vertical_mirror in PP[1], OO[1]
-	OP  = _mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O));		mO = (unsigned int) O & 0x7e7e7e7eU;
-	rOP = _mm_shufflelo_epi16(OP, 0x1B);						flip1  = mO & ((unsigned int) P << 1);
-	rOP = _mm_shufflehi_epi16(rOP, 0x1B);						flip1 |= mO & (flip1 << 1);
-											pre1   = mO & (mO << 1);
-	rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8));
-	    										flip1 |= pre1 & (flip1 << 2);
-	PP  = _mm_unpacklo_epi64(OP, rOP);						flip1 |= pre1 & (flip1 << 2);
-	OO  = _mm_unpackhi_epi64(OP, rOP);						movesL = flip1 << 1;
-
-	flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 8));				flip1  = mO & ((unsigned int) P >> 1);
-	flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 8)));		flip1 |= mO & (flip1 >> 1);
-	pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 8));					pre1 >>= 1;
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 16)));	flip1 |= pre1 & (flip1 >> 2);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 16)));	flip1 |= pre1 & (flip1 >> 2);
-	MM = _mm_slli_epi64(flip, 8);							movesL |= flip1 >> 1;
-
-	OO = _mm_and_si128(OO, mask7e);							mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU;
-	flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 7));				flip1  = mO & ((unsigned int) (P >> 32) << 1);
-	flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 7)));		flip1 |= mO & (flip1 << 1);
-	pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 7));					pre1   = mO & (mO << 1);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14)));	flip1 |= pre1 & (flip1 << 2);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14)));	flip1 |= pre1 & (flip1 << 2);
-	MM = _mm_or_si128(MM, _mm_slli_epi64(flip, 7));					movesH = flip1 << 1;
-
-	flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 9));				flip1  = mO & ((unsigned int) (P >> 32) >> 1);
-	flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 9)));		flip1 |= mO & (flip1 >> 1);
-	pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 9));					pre1 >>= 1;
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18)));	flip1 |= pre1 & (flip1 >> 2);
-	flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18)));	flip1 |= pre1 & (flip1 >> 2);
-	MM = _mm_or_si128(MM, _mm_slli_epi64(flip, 9));					movesH |= flip1 >> 1;
-
-	movesL |= _mm_cvtsi128_si32(MM);	MM = _mm_srli_si128(MM, 4);
-	movesH |= _mm_cvtsi128_si32(MM);	MM = _mm_srli_si128(MM, 4);
-	movesH |= bswap_int(_mm_cvtsi128_si32(MM));
-	movesL |= bswap_int(_mm_cvtsi128_si32(_mm_srli_si128(MM, 4)));
-	return (movesL | ((unsigned long long) movesH << 32)) & ~(P|O);	// mask with empties
-}
-
-#else // non-VEX asm
-
-unsigned long long get_moves_sse(unsigned long long P, unsigned long long O)
-{
-	unsigned long long moves;
-	static const V2DI mask7e = {{ 0x7e7e7e7e7e7e7e7eULL, 0x7e7e7e7e7e7e7e7eULL }};
-
-	__asm__ (
-		"movl	%1, %%ebx\n\t"
-		"movl	%3, %%edi\n\t"
-		"andl	$0x7e7e7e7e, %%edi\n\t"
-				/* shift=-1 */			/* vertical mirror in PP[1], OO[1] */
-		"movl	%%ebx, %%eax\n\t"	"movd	%1, %%xmm4\n\t"		// (movd for store-forwarding)
-		"shrl	$1, %%eax\n\t"		"movd	%2, %%xmm0\n\t"
-		"andl	%%edi, %%eax\n\t"	"movd	%3, %%xmm5\n\t"
-		"movl	%%eax, %%edx\n\t"	"movd	%4, %%xmm1\n\t"
-		"shrl	$1, %%eax\n\t"		"punpckldq %%xmm0, %%xmm4\n\t"		// P
-		"movl	%%edi, %%ecx\n\t"	"punpckldq %%xmm1, %%xmm5\n\t"		// O
-		"andl	%%edi, %%eax\n\t"	"punpcklqdq %%xmm5, %%xmm4\n\t"		// OP
-		"shrl	$1, %%ecx\n\t"		"pshuflw $0x1b, %%xmm4, %%xmm0\n\t"
-		"orl	%%edx, %%eax\n\t"	"pshufhw $0x1b, %%xmm0, %%xmm0\n\t"
-		"andl	%%edi, %%ecx\n\t"	"movdqa	%%xmm0, %%xmm1\n\t"
-		"movl	%%eax, %%edx\n\t"	"psllw	$8, %%xmm0\n\t"
-		"shrl	$2, %%eax\n\t"		"psrlw	$8, %%xmm1\n\t"
-		"andl	%%ecx, %%eax\n\t"	"por	%%xmm1, %%xmm0\n\t"		// rOP
-		"orl	%%eax, %%edx\n\t"
-		"shrl	$2, %%eax\n\t"		"movdqa	%%xmm4, %%xmm5\n\t"
-		"andl	%%ecx, %%eax\n\t"	"punpcklqdq %%xmm0, %%xmm4\n\t"		// PP
-		"orl	%%edx, %%eax\n\t"	"punpckhqdq %%xmm0, %%xmm5\n\t"		// OO
-		"shrl	$1, %%eax\n\t"
-				/* shift=+1 */			/* shift=-8:+8 */
-						"movdqa	%%xmm4, %%xmm0\n\t"
-		"addl	%%ebx, %%ebx\n\t"	"psllq	$8, %%xmm0\n\t"
-		"andl	%%edi, %%ebx\n\t"	"pand	%%xmm5, %%xmm0\n\t"	// 0 m7&o6 m6&o5 .. m1&o0
-		"movl	%%ebx, %%edx\n\t"	"movdqa	%%xmm0, %%xmm1\n\t"
-		"addl	%%ebx, %%ebx\n\t"	"psllq	$8, %%xmm0\n\t"
-						"movdqa	%%xmm5, %%xmm3\n\t"
-		"andl	%%edi, %%ebx\n\t"	"pand	%%xmm5, %%xmm0\n\t"	// 0 0 m7&o6&o5 .. m2&o1&o0
-						"psllq	$8, %%xmm3\n\t"
-		"orl	%%ebx, %%edx\n\t"	"por	%%xmm1, %%xmm0\n\t"	// 0 m7&o6 (m6&o5)|(m7&o6&o5) .. (m1&o0)
-		"addl	%%ecx, %%ecx\n\t"	"pand	%%xmm5, %%xmm3\n\t"	// 0 o7&o6 o6&o5 o5&o4 o4&o3 ..
-						"movdqa	%%xmm0, %%xmm2\n\t"
-		"leal	(,%%edx,4), %%ebx\n\t"	"psllq	$16, %%xmm0\n\t"
-		"andl	%%ecx, %%ebx\n\t"	"pand	%%xmm3, %%xmm0\n\t"	// 0 0 0 m7&o6&o5&o4 (m6&o5&o4&o3)|(m7&o6&o5&o4&o3) ..
-		"orl	%%ebx, %%edx\n\t"	"por	%%xmm0, %%xmm2\n\t"
-		"shll	$2, %%ebx\n\t"		"psllq	$16, %%xmm0\n\t"
-		"andl	%%ecx, %%ebx\n\t"	"pand	%%xmm3, %%xmm0\n\t"	// 0 0 0 0 0 m7&o6&..&o2 (m6&o5&..&o1)|(m7&o6&..&o1) ..
-		"orl	%%edx, %%ebx\n\t"	"por	%%xmm0, %%xmm2\n\t"
-		"addl	%%ebx, %%ebx\n\t"	"psllq	$8, %%xmm2\n\t"
-		"orl	%%eax, %%ebx\n\t"
-
-		"movl	%2, %%esi\n\t"
-		"movl	%4, %%edi\n\t"
-				/* shift=-1 */			/* shift=-9:+7 */
-		"andl	$0x7e7e7e7e,%%edi\n\t"	"pand	%5, %%xmm5\n\t"
-		"movl	%%esi, %%eax\n\t"	"movdqa	%%xmm4, %%xmm0\n\t"
-		"shrl	$1, %%eax\n\t"		"psllq	$7, %%xmm0\n\t"
-		"andl	%%edi, %%eax\n\t"	"pand	%%xmm5, %%xmm0\n\t"
-		"movl	%%eax, %%edx\n\t"	"movdqa	%%xmm0, %%xmm1\n\t"
-		"shrl	$1, %%eax\n\t"		"psllq	$7, %%xmm0\n\t"
-		"andl	%%edi, %%eax\n\t"	"pand	%%xmm5, %%xmm0\n\t"
-		"movl	%%edi, %%ecx\n\t"	"movdqa	%%xmm5, %%xmm3\n\t"
-		"orl	%%edx, %%eax\n\t"	"por	%%xmm1, %%xmm0\n\t"
-		"shrl	$1, %%ecx\n\t"		"psllq	$7, %%xmm3\n\t"
-		"movl	%%eax, %%edx\n\t"	"movdqa	%%xmm0, %%xmm1\n\t"
-		"andl	%%edi, %%ecx\n\t"	"pand	%%xmm5, %%xmm3\n\t"
-		"shrl	$2, %%eax\n\t"		"psllq	$14, %%xmm0\n\t"
-		"andl	%%ecx, %%eax\n\t"	"pand	%%xmm3, %%xmm0\n\t"
-		"orl	%%eax, %%edx\n\t"	"por	%%xmm0, %%xmm1\n\t"
-		"shrl	$2, %%eax\n\t"		"psllq	$14, %%xmm0\n\t"
-		"andl	%%ecx, %%eax\n\t"	"pand	%%xmm3, %%xmm0\n\t"
-		"orl	%%edx, %%eax\n\t"	"por	%%xmm1, %%xmm0\n\t"
-		"shrl	$1, %%eax\n\t"		"psllq	$7, %%xmm0\n\t"
-						"por	%%xmm0, %%xmm2\n\t"
-				/* shift=+1 */			/* shift=-7:+9 */
-						"movdqa	%%xmm4, %%xmm0\n\t"
-		"addl	%%esi, %%esi\n\t"	"psllq	$9, %%xmm0\n\t"
-		"andl	%%edi, %%esi\n\t"	"pand	%%xmm5, %%xmm0\n\t"
-		"movl	%%esi, %%edx\n\t"	"movdqa	%%xmm0, %%xmm1\n\t"
-		"addl	%%esi, %%esi\n\t"	"psllq	$9, %%xmm0\n\t"
-		"andl	%%edi, %%esi\n\t"	"pand	%%xmm5, %%xmm0\n\t"
-						"movdqa	%%xmm5, %%xmm3\n\t"
-		"orl	%%esi, %%edx\n\t"	"por	%%xmm1, %%xmm0\n\t"
-						"psllq	$9, %%xmm3\n\t"
-						"movdqa	%%xmm0, %%xmm1\n\t"
-		"addl	%%ecx, %%ecx\n\t"	"pand	%%xmm5, %%xmm3\n\t"
-		"leal	(,%%edx,4), %%esi\n\t"	"psllq	$18, %%xmm0\n\t"
-		"andl	%%ecx, %%esi\n\t"	"pand	%%xmm3, %%xmm0\n\t"
-		"orl	%%esi, %%edx\n\t"	"por	%%xmm0, %%xmm1\n\t"
-		"shll	$2, %%esi\n\t"		"psllq	$18, %%xmm0\n\t"
-		"andl	%%ecx, %%esi\n\t"	"pand	%%xmm3, %%xmm0\n\t"
-		"orl	%%edx, %%esi\n\t"	"por	%%xmm1, %%xmm0\n\t"
-		"addl	%%esi, %%esi\n\t"	"psllq	$9, %%xmm0\n\t"
-		"orl	%%eax, %%esi\n\t"	"por	%%xmm0, %%xmm2\n\t"
-
-		"movl	%1, %%eax\n\t"		"movhlps %%xmm2, %%xmm3\n\t"
-		"movl	%2, %%edx\n\t"		"movd	%%xmm3, %%edi\n\t"	"movd	%%xmm2, %%ecx\n\t"
-						"psrlq	$32, %%xmm3\n\t"	"psrlq	$32, %%xmm2\n\t"
-						"bswapl	%%edi\n\t"		"orl	%%ecx, %%ebx\n\t"
-		"orl	%3, %%eax\n\t"		"orl	%%edi, %%esi\n\t"
-		"orl	%4, %%edx\n\t"		"movd	%%xmm3, %%edi\n\t"	"movd	%%xmm2, %%ecx\n\t"
-		"notl	%%eax\n\t"		"bswapl	%%edi\n\t"
-		"notl	%%edx\n\t"		"orl	%%edi, %%ebx\n\t"	"orl	%%ecx, %%esi\n\t"
-		"andl	%%esi, %%edx\n\t"
-		"andl	%%ebx, %%eax"
-	: "=&A" (moves)
-	: "m" (P), "m" (((unsigned int *)&P)[1]), "m" (O), "m" (((unsigned int *)&O)[1]), "m" (mask7e)
-	: "ebx", "ecx", "esi", "edi" );
-
-	return moves;
-}
-
-#endif // hasSSE2
-#endif // x86
-
-#if defined(__x86_64__) || defined(_M_X64)
-/**
- * @brief SSE optimized get_stable_edge
- *
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return a bitboard with (some of) player's stable discs.
- *
- */
-static unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
-{
-	// compute the exact stable edges (from precomputed tables)
-	unsigned int a1a8po, h1h8po;
-	unsigned long long stable_edge;
-
-	__m128i	P0 = _mm_cvtsi64_si128(P);
-	__m128i	O0 = _mm_cvtsi64_si128(O);
-	__m128i	PO = _mm_unpacklo_epi8(O0, P0);
-	stable_edge = edge_stability[_mm_extract_epi16(PO, 0)]
-		| ((unsigned long long) edge_stability[_mm_extract_epi16(PO, 7)] << 56);
-
-	PO = _mm_unpacklo_epi64(O0, P0);
-	a1a8po = _mm_movemask_epi8(_mm_slli_epi64(PO, 7));
-	h1h8po = _mm_movemask_epi8(PO);
-#if 0 // def __BMI2__ // pdep is slow on AMD
-	stable_edge |= _pdep_u64(edge_stability[a1a8po], 0x0101010101010101ULL)
-		| _pdep_u64(edge_stability[h1h8po], 0x8080808080808080ULL);
-#else
-	stable_edge |= A1_A8[edge_stability[a1a8po]] | (A1_A8[edge_stability[h1h8po]] << 7);
-#endif
-	return stable_edge;
-}
-
-/**
- * @brief X64 optimized get_stability
- *
- * SSE pcmpeqb for horizontal get_full_lines.
- * CPU rotate for vertical get_full_lines.
- * Diag-7 is converted to diag-9 using vertical mirroring.
- * 
- * @param P bitboard with player's discs.
- * @param O bitboard with opponent's discs.
- * @return the number of stable discs.
- */
-#ifdef __AVX2__
-
-int get_stability(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long disc = (P | O);
-	unsigned long long P_central = (P & 0x007e7e7e7e7e7e00ULL);
-	unsigned long long l8, stable;
-	__m128i	l81, l79, v2_stable, v2_old_stable, v2_P_central;
-	__m256i	lr79, v4_disc, v4_stable, v4_full;
-	const __m128i kff = _mm_set1_epi64x(0xffffffffffffffff);
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-#if 0 // PCMPEQQ
-	static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }};	// V8SI
-	static const V4DI m792 = {{ 0x0000008040201008, 0x0000000102040810, 0x1008040201000000, 0x0810204080000000 }};
-	static const V4DI m793 = {{ 0x0000804020100804, 0x0000010204081020, 0x2010080402010000, 0x0408102040800000 }};
-	static const V4DI m794 = {{ 0x0080402010080402, 0x0001020408102040, 0x4020100804020100, 0x0204081020408000 }};
-	static const V2DI m795 = {{ 0x8040201008040201, 0x0102040810204080 }};
-
-	l81 = _mm_cvtsi64_si128(disc);		v4_disc = _mm256_broadcastq_epi64(l81);
-	l81 = _mm_cmpeq_epi8(kff, l81);		lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4);
-						lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4));
-	l8 = disc;				lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m793.v4), m793.v4), m793.v4));
-	l8 &= (l8 >> 8) | (l8 << 56);		lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4));
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2);
-	l8 &= (l8 >> 32) | (l8 << 32);		l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)));
-
-#elif 0 // PCMPEQD
-	__m256i lm79;
-	static const V4DI m790 = {{ 0x80c0e0f0783c1e0f, 0x0103070f1e3c78f0, 0x70381c0e07030100, 0x0e1c3870e0c08000 }};
-	static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }};	// V8SI
-	static const V4DI m792 = {{ 0x2010884440201088, 0x0408112202040811, 0x2211080411080402, 0x4488102088102040 }};	// V8SI
-	static const V4DI m793 = {{ 0x8844221110884422, 0x1122448808112244, 0x0000000044221108, 0x0000000022448810 }};	// V8SI
-
-	l81 = _mm_cvtsi64_si128(disc);		v4_disc = _mm256_broadcastq_epi64(l81);
-	l81 = _mm_cmpeq_epi8(kff, l81);		lm79 = _mm256_and_si256(v4_disc, m790.v4);
-						lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1));
-	l8 = disc;				lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m792.v4), m792.v4), m792.v4);
-	l8 &= (l8 >> 8) | (l8 << 56);		lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m793.v4), m793.v4), m793.v4));
-	l8 &= (l8 >> 16) | (l8 << 48);		lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4);
-	l8 &= (l8 >> 32) | (l8 << 32);		lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4));
-						l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79));
-
-#else // Kogge-Stone
-	const __m128i mcpyswap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
-	const __m128i mbswapll = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
-	static const V4DI shiftlr[] = {{{ 9, 7, 7, 9 }}, {{ 18, 14, 14, 18 }}, {{ 36, 28, 28, 36 }}};
-	static const V4DI e790 = {{ 0xff80808080808080, 0xff01010101010101, 0xff01010101010101, 0xff80808080808080 }};
-	static const V4DI e791 = {{ 0xffffc0c0c0c0c0c0, 0xffff030303030303, 0xffff030303030303, 0xffffc0c0c0c0c0c0 }};
-	static const V4DI e792 = {{ 0xfffffffff0f0f0f0, 0xffffffff0f0f0f0f, 0xffffffff0f0f0f0f, 0xfffffffff0f0f0f0 }};
-
-	l81 = _mm_cvtsi64_si128(disc);		v4_disc = _mm256_castsi128_si256(_mm_shuffle_epi8(l81, mcpyswap));
-	l81 = _mm_cmpeq_epi8(kff, l81);		lr79 = _mm256_permute4x64_epi64(v4_disc, 0x50);	// disc, disc, rdisc, rdisc
-						lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e790.v4, _mm256_srlv_epi64(lr79, shiftlr[0].v4)));
-	l8 = disc;				lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e791.v4, _mm256_srlv_epi64(lr79, shiftlr[1].v4)));
-	l8 &= (l8 >> 8) | (l8 << 56);		lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e792.v4, _mm256_srlv_epi64(lr79, shiftlr[2].v4)));
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = _mm_shuffle_epi8(_mm256_extracti128_si256(lr79, 1), mbswapll);
-	l8 &= (l8 >> 32) | (l8 << 32);		l79 = _mm_and_si128(l79, _mm256_castsi256_si128(lr79));
-
-#endif
-	l81 = _mm_insert_epi64(l81, l8, 1);
-
-	// compute the exact stable edges (from precomputed tables)
-	stable = get_stable_edge(P, O);
-
-	// add full lines
-	v2_stable = _mm_and_si128(l81, l79);
-	stable |= _mm_cvtsi128_si64(_mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable))) & P_central;
-
-	if (stable == 0)
-		return 0;
-
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-	v4_full = _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1);
-	v2_stable = _mm_cvtsi64_si128(stable);
-	v2_P_central = _mm_cvtsi64_si128(P_central);
-	do {
-		v2_old_stable = v2_stable;
-		v4_stable = _mm256_broadcastq_epi64(v2_stable);
-		v4_stable = _mm256_or_si256(_mm256_or_si256(_mm256_srlv_epi64(v4_stable, shift1897), _mm256_sllv_epi64(v4_stable, shift1897)), v4_full);
-		v2_stable = _mm_and_si128(_mm256_castsi256_si128(v4_stable), _mm256_extracti128_si256(v4_stable, 1));
-		v2_stable = _mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable));
-		v2_stable = _mm_or_si128(v2_old_stable, _mm_and_si128(v2_stable, v2_P_central));
-	} while (!_mm_testc_si128(v2_old_stable, v2_stable));
-
-	return bit_count(_mm_cvtsi128_si64(v2_stable));
-}
-
-#else
-
-int get_stability(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long disc = (P | O);
-	unsigned long long P_central = (P & 0x007e7e7e7e7e7e00ULL);
-	unsigned long long l8, full_h, full_v, full_d7, full_d9, stable;
-	unsigned long long stable_h, stable_v, stable_d7, stable_d9, old_stable;
-#if 1	// 1 CPU, 3 SSE
-	__m128i l01, l79, r79;	// full lines
-	const __m128i kff  = _mm_set1_epi64x(0xffffffffffffffff);
-	const __m128i edge = _mm_set1_epi64x(0xff818181818181ff);
-	const __m128i e791 = _mm_set1_epi64x(0xffffc0c0c0c0c0c0);
-	const __m128i e792 = _mm_set1_epi64x(0x030303030303ffff);
-	const __m128i e793 = _mm_set1_epi64x(0x0f0f0f0ff0f0f0f0);
-
-	l01 = l79 = _mm_cvtsi64_si128(disc);	r79 = _mm_cvtsi64_si128(vertical_mirror(disc));
-	l01 = _mm_cmpeq_epi8(kff, l01);		l79 = r79 = _mm_unpacklo_epi64(l79, r79);
-	full_h = _mm_cvtsi128_si64(l01);	l79 = _mm_and_si128(l79, _mm_or_si128(edge, _mm_srli_epi64(l79, 9)));
-						r79 = _mm_and_si128(r79, _mm_or_si128(edge, _mm_slli_epi64(r79, 9)));
-	l8 = disc;				l79 = _mm_and_si128(l79, _mm_or_si128(e791, _mm_srli_epi64(l79, 18)));
-	l8 &= (l8 >> 8) | (l8 << 56);		r79 = _mm_and_si128(r79, _mm_or_si128(e792, _mm_slli_epi64(r79, 18)));
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = _mm_and_si128(_mm_and_si128(l79, r79), _mm_or_si128(e793, _mm_or_si128(_mm_srli_epi64(l79, 36), _mm_slli_epi64(r79, 36))));
-	l8 &= (l8 >> 32) | (l8 << 32);		full_d9 = _mm_cvtsi128_si64(l79);
-	full_v = l8;				full_d7 = vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(l79, l79)));
-
-#else	// 4 CPU
-	unsigned long long l1, l7, l9, r7, r9;	// full lines
-	static const unsigned long long edge = 0xff818181818181ffULL;
-	static const unsigned long long k01 = 0x0101010101010101ULL;
-	static const unsigned long long e7[] = { 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff };
-	static const unsigned long long e9[] = { 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0x0f0f0f0ff0f0f0f0 };
-
-	l1 = l7 = r7 = disc;
-	l1 &= l1 >> 1;				l7 &= edge | (l7 >> 7);		r7 &= edge | (r7 << 7);
-	l1 &= l1 >> 2;				l7 &= e7[0] | (l7 >> 14);	r7 &= e7[1] | (r7 << 14);
-	l1 &= l1 >> 4;				l7 &= e7[2] | (l7 >> 28);	r7 &= e7[3] | (r7 << 28);
-	full_h = ((l1 & k01) * 0xff);		full_d7 = l7 & r7;
-
-	l8 = l9 = r9 = disc;
-	l8 &= (l8 >> 8) | (l8 << 56);		l9 &= edge | (l9 >> 9);		r9 &= edge | (r9 << 9);
-	l8 &= (l8 >> 16) | (l8 << 48);		l9 &= e9[0] | (l9 >> 18);	r9 &= e9[1] | (r9 << 18);
-	l8 &= (l8 >> 32) | (l8 << 32);		full_d9 = l9 & r9 & (e9[2] | (l9 >> 36) | (r9 << 36));
-	full_v = l8;
-
-#endif
-	// compute the exact stable edges (from precomputed tables)
-	stable = get_stable_edge(P, O);
-
-	// add full lines
-	stable |= (full_h & full_v & full_d7 & full_d9 & P_central);
-
-	if (stable == 0)
-		return 0;
-
-	// now compute the other stable discs (ie discs touching another stable disc in each flipping direction).
-	do {
-		old_stable = stable;
-		stable_h = ((stable >> 1) | (stable << 1) | full_h);
-		stable_v = ((stable >> 8) | (stable << 8) | full_v);
-		stable_d7 = ((stable >> 7) | (stable << 7) | full_d7);
-		stable_d9 = ((stable >> 9) | (stable << 9) | full_d9);
-		stable |= (stable_h & stable_v & stable_d7 & stable_d9 & P_central);
-	} while (stable != old_stable);
-
-	return bit_count(stable);
-}
-
-#endif // __AVX2__
-#endif // __x86_64__
-
-/**
- * @brief SSE translation of board_get_hash_code.
- *
- * Too many dependencies, effective only on 32bit build.
- * For AMD, MMX version in board_mmx.c is faster.
- *
- * @param p pointer to 16 bytes to hash.
- * @return the hash code of the bitboard
- */
-#if (defined(USE_GAS_MMX) && !defined(__3dNOW__)) || defined(USE_MSVC_X86) // || defined(__x86_64__)
-
-unsigned long long board_get_hash_code_sse(const unsigned char *p)
-{
-	unsigned long long h;
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128	h0, h1, h2, h3;
-
-	h0 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[0][p[0]])), (__m64 *) &hash_rank[4][p[4]]);
-	h1 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[1][p[1]])), (__m64 *) &hash_rank[5][p[5]]);
-	h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[2][p[2]])), (__m64 *) &hash_rank[6][p[6]]);
-	h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[3][p[3]])), (__m64 *) &hash_rank[7][p[7]]);
-	h0 = _mm_xor_ps(h0, h2);	h1 = _mm_xor_ps(h1, h3);
-	h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[8][p[8]])), (__m64 *) &hash_rank[10][p[10]]);
-	h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[9][p[9]])), (__m64 *) &hash_rank[11][p[11]]);
-	h0 = _mm_xor_ps(h0, h2);	h1 = _mm_xor_ps(h1, h3);
-	h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[12][p[12]])), (__m64 *) &hash_rank[14][p[14]]);
-	h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[13][p[13]])), (__m64 *) &hash_rank[15][p[15]]);
-	h0 = _mm_xor_ps(h0, h2);	h1 = _mm_xor_ps(h1, h3);
-	h0 = _mm_xor_ps(h0, h1);
-	h0 = _mm_xor_ps(h0, _mm_movehl_ps(h1, h0));
-	h = _mm_cvtsi128_si64(_mm_castps_si128(h0));
-
-#else
-	__asm__ volatile (
-		"movq	%0, %%xmm0\n\t"		"movq	%1, %%xmm1"
-	: : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]]));
-	__asm__ volatile (
-		"movq	%0, %%xmm2\n\t"		"movq	%1, %%xmm3"
-	: : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]]));
-	__asm__ volatile (
-		"movhps	%0, %%xmm0\n\t"		"movhps	%1, %%xmm1"
-	: : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]]));
-	__asm__ volatile (
-		"movhps	%0, %%xmm2\n\t"		"movhps	%1, %%xmm3"
-	: : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]]));
-	__asm__ volatile (
-		"xorps	%%xmm2, %%xmm0\n\t"	"xorps	%%xmm3, %%xmm1\n\t"
-		"movq	%0, %%xmm2\n\t"		"movq	%1, %%xmm3"
-	: : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]]));
-	__asm__ volatile (
-		"movhps	%0, %%xmm2\n\t"		"movhps	%1, %%xmm3"
-	: : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]]));
-	__asm__ volatile (
-		"xorps	%%xmm2, %%xmm0\n\t"	"xorps	%%xmm3, %%xmm1\n\t"
-		"movq	%0, %%xmm2\n\t"		"movq	%1, %%xmm3"
-	: : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]]));
-	__asm__ volatile (
-		"movhps	%1, %%xmm2\n\t"		"movhps	%2, %%xmm3\n\t"
-		"xorps	%%xmm2, %%xmm0\n\t"	"xorps	%%xmm3, %%xmm1\n\t"
-		"xorps	%%xmm1, %%xmm0\n\t"
-		"movhlps %%xmm0, %%xmm1\n\t"
-		"xorps	%%xmm1, %%xmm0\n\t"
-		"movd	%%xmm0, %%eax\n\t"
-		"punpckhdq %%xmm0, %%xmm0\n\t"
-		"movd	%%xmm0, %%edx"
-	: "=A" (h) : "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]]));
-#endif
-
-	return h;
-}
-
-#endif // USE_GAS_MMX
-
-#if 0 // def __AVX2__	// experimental - too many instructions
-
-unsigned long long board_get_hash_code_avx2(const unsigned char *p)
-{
-	__m128i	ix0, ix8, hh;
-	__m256i	hhh;
-	static const __v16qi rank = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-
-	ix0 = _mm_loadu_si128((__m128i *) p);
-	ix8 = _mm_unpackhi_epi8(ix0, (__m128i) rank);
-	ix0 = _mm_unpacklo_epi8(ix0, (__m128i) rank);
-
-	hhh  = _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix0, 0x55), 8);
-	hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix8, 0x55), 8);
-	hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix0, 16), 8);
-	hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix8, 16), 8);
-
-	hh = _mm256_castsi256_si128(hhh) ^ _mm256_extracti128_si256(hhh, 1);
-	hh ^= _mm_shuffle_epi32(hh, 0x4e);
-	return hh[0];
-}
-
-#endif
->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability)
-=======
-
-  #elif defined(hasNeon)
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-
-void get_full_lines(const unsigned long long disc, unsigned long long full[4])
-{
-	unsigned long long l8;
-	uint8x8_t l01;
-	uint64x2_t l79, r79;
-	const uint64x2_t e790 = vdupq_n_u64(0x007f7f7f7f7f7f7f);
-	const uint64x2_t e791 = vdupq_n_u64(0xfefefefefefefe00);
-	const uint64x2_t e792 = vdupq_n_u64(0x00003f3f3f3f3f3f);
-	const uint64x2_t e793 = vdupq_n_u64(0x0f0f0f0ff0f0f0f0);
-
-	l01 = vcreate_u8(disc);			l79 = r79 = vreinterpretq_u64_u8(vcombine_u8(l01, vrev64_u8(l01)));
-	l01 = vceq_u8(l01, vdup_n_u8(0xff));	l79 = vandq_u64(l79, vornq_u64(vshrq_n_u64(l79, 9), e790));
-	full[0] = vget_lane_u64(vreinterpret_u64_u8(l01), 0);
-						r79 = vandq_u64(r79, vornq_u64(vshlq_n_u64(r79, 9), e791));
-	l8 = disc;				l79 = vbicq_u64(l79, vbicq_u64(e792, vshrq_n_u64(l79, 18)));	// De Morgan
-	l8 &= (l8 >> 8) | (l8 << 56);		r79 = vbicq_u64(r79, vshlq_n_u64(vbicq_u64(e792, r79), 18));
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = vandq_u64(vandq_u64(l79, r79), vorrq_u64(e793, vsliq_n_u64(vshrq_n_u64(l79, 36), r79, 36)));
-	l8 &= (l8 >> 32) | (l8 << 32);		full[2] = vgetq_lane_u64(l79, 0);
-	full[1] = l8;				full[3] = vertical_mirror(vgetq_lane_u64(l79, 1));
-}
-
-  #else	// 1 CPU, 3 SSE
-
-void get_full_lines(const unsigned long long disc, unsigned long long full[4])
-{
-	unsigned long long rdisc = vertical_mirror(disc);
-	unsigned long long l8;
-	__m128i l01, l79, r79;	// full lines
-	const __m128i kff  = _mm_set1_epi8(-1);
-	const __m128i e790 = _mm_set1_epi64x(0xff80808080808080);
-	const __m128i e791 = _mm_set1_epi64x(0x01010101010101ff);
-	const __m128i e792 = _mm_set1_epi64x(0x00003f3f3f3f3f3f);
-	const __m128i e793 = _mm_set1_epi64x(0x0f0f0f0ff0f0f0f0);
-
-	l01 = l79 = _mm_cvtsi64_si128(disc);	l79 = r79 = _mm_unpacklo_epi64(l79, _mm_cvtsi64_si128(rdisc));
-	l01 = _mm_cmpeq_epi8(kff, l01);		l79 = _mm_and_si128(l79, _mm_or_si128(e790, _mm_srli_epi64(l79, 9)));
-	_mm_storel_epi64((__m128i*) &full[0], l01);
-						r79 = _mm_and_si128(r79, _mm_or_si128(e791, _mm_slli_epi64(r79, 9)));
-	l8 = disc;				l79 = _mm_andnot_si128(_mm_andnot_si128(_mm_srli_epi64(l79, 18), e792), l79);	// De Morgan
-	l8 &= (l8 >> 8) | (l8 << 56);		r79 = _mm_andnot_si128(_mm_slli_epi64(_mm_andnot_si128(r79, e792), 18), r79);
-	l8 &= (l8 >> 16) | (l8 << 48);		l79 = _mm_and_si128(_mm_and_si128(l79, r79), _mm_or_si128(e793, _mm_or_si128(_mm_srli_epi64(l79, 36), _mm_slli_epi64(r79, 36))));
-	l8 &= (l8 >> 32) | (l8 << 32);		_mm_storel_epi64((__m128i *) &full[2], l79);
-	full[1] = l8;				full[3] = vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(l79, l79)));
-}
-
-<<<<<<< HEAD
-#endif
-<<<<<<< HEAD
-#endif // HAS_CPU_64/ANDROID
-<<<<<<< HEAD
-
-/**
- * @brief SSE translation of board_get_hash_code.
- *
- * Too many dependencies, effective only on 32bit build.
- * For AMD, MMX version in board_mmx.c is faster.
- *
- * @param p pointer to 16 bytes to hash.
- * @return the hash code of the bitboard
- */
-#if (defined(USE_GAS_MMX) && !defined(__3dNOW__)) || defined(USE_MSVC_X86) // || defined(__x86_64__)
-
-unsigned long long board_get_hash_code_sse(const unsigned char *p)
-{
-	unsigned long long h;
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128	h0, h1, h2, h3;
-
-	h0 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[0][p[0]])), (__m64 *) &hash_rank[4][p[4]]);
-	h1 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[1][p[1]])), (__m64 *) &hash_rank[5][p[5]]);
-	h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[2][p[2]])), (__m64 *) &hash_rank[6][p[6]]);
-	h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[3][p[3]])), (__m64 *) &hash_rank[7][p[7]]);
-	h0 = _mm_xor_ps(h0, h2);	h1 = _mm_xor_ps(h1, h3);
-	h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[8][p[8]])), (__m64 *) &hash_rank[10][p[10]]);
-	h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[9][p[9]])), (__m64 *) &hash_rank[11][p[11]]);
-	h0 = _mm_xor_ps(h0, h2);	h1 = _mm_xor_ps(h1, h3);
-	h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[12][p[12]])), (__m64 *) &hash_rank[14][p[14]]);
-	h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[13][p[13]])), (__m64 *) &hash_rank[15][p[15]]);
-	h0 = _mm_xor_ps(h0, h2);	h1 = _mm_xor_ps(h1, h3);
-	h0 = _mm_xor_ps(h0, h1);
-	h0 = _mm_xor_ps(h0, _mm_movehl_ps(h1, h0));
-	h = _mm_cvtsi128_si64(_mm_castps_si128(h0));
-
-#else
-	__asm__ volatile (
-		"movq	%0, %%xmm0\n\t"		"movq	%1, %%xmm1"
-	: : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]]));
-	__asm__ volatile (
-		"movq	%0, %%xmm2\n\t"		"movq	%1, %%xmm3"
-	: : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]]));
-	__asm__ volatile (
-		"movhps	%0, %%xmm0\n\t"		"movhps	%1, %%xmm1"
-	: : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]]));
-	__asm__ volatile (
-		"movhps	%0, %%xmm2\n\t"		"movhps	%1, %%xmm3"
-	: : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]]));
-	__asm__ volatile (
-		"xorps	%%xmm2, %%xmm0\n\t"	"xorps	%%xmm3, %%xmm1\n\t"
-		"movq	%0, %%xmm2\n\t"		"movq	%1, %%xmm3"
-	: : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]]));
-	__asm__ volatile (
-		"movhps	%0, %%xmm2\n\t"		"movhps	%1, %%xmm3"
-	: : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]]));
-	__asm__ volatile (
-		"xorps	%%xmm2, %%xmm0\n\t"	"xorps	%%xmm3, %%xmm1\n\t"
-		"movq	%0, %%xmm2\n\t"		"movq	%1, %%xmm3"
-	: : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]]));
-	__asm__ volatile (
-		"movhps	%1, %%xmm2\n\t"		"movhps	%2, %%xmm3\n\t"
-		"xorps	%%xmm2, %%xmm0\n\t"	"xorps	%%xmm3, %%xmm1\n\t"
-		"xorps	%%xmm1, %%xmm0\n\t"
-		"movhlps %%xmm0, %%xmm1\n\t"
-		"xorps	%%xmm1, %%xmm0\n\t"
-		"movd	%%xmm0, %%eax\n\t"
-		"punpckhdq %%xmm0, %%xmm0\n\t"
-		"movd	%%xmm0, %%edx"
-	: "=A" (h) : "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]]));
-#endif
-
-	return h;
-}
-
-#endif // USE_GAS_MMX
-
-#if 0 // def __AVX2__	// experimental - too many instructions
-
-unsigned long long board_get_hash_code_avx2(const unsigned char *p)
-{
-	__m128i	ix0, ix8, hh;
-	__m256i	hhh;
-	static const __v16qi rank = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-
-	ix0 = _mm_loadu_si128((__m128i *) p);
-	ix8 = _mm_unpackhi_epi8(ix0, (__m128i) rank);
-	ix0 = _mm_unpacklo_epi8(ix0, (__m128i) rank);
-
-	hhh  = _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix0, 0x55), 8);
-	hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix8, 0x55), 8);
-	hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix0, 16), 8);
-	hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix8, 16), 8);
-
-	hh = _mm256_castsi256_si128(hhh) ^ _mm256_extracti128_si256(hhh, 1);
-	hh ^= _mm_shuffle_epi32(hh, 0x4e);
-	return hh[0];
-}
-
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
-=======
-  #endif
-<<<<<<< HEAD
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-#endif // hasSSE2/hasNeon
-<<<<<<< HEAD
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
-=======
-#endif // hasSSE2/__ARM_NEON
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-
 #ifdef __AVX2__
 /**
  * @brief AVX2 optimized get_stability
@@ -2818,4 +952,3 @@ __m128i vectorcall get_moves_and_potential(__m256i PP, __m256i OO)
 }
 
 #endif
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
diff --git a/src/book.c b/src/book.c
index 343b2e4..b0784d2 100644
--- a/src/book.c
+++ b/src/book.c
@@ -197,15 +197,7 @@ static bool position_is_ok(const Position *position)
 		} else {
 			if (/*l->move < A1 ||*/ l->move > H8
 			 || board_is_occupied(&board, l->move)
-<<<<<<< HEAD
-<<<<<<< HEAD
 			 || board_get_move_flip(&board, l->move, &move) == 0) {
-=======
-			 || board_get_move(&board, l->move, &move) == 0) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			 || board_get_move_flip(&board, l->move, &move) == 0) {
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 				warn("link %s is wrong\n", move_to_string(l->move, WHITE, s));
 				position_print(position, &position->board, stdout);
 				return false;
@@ -230,15 +222,7 @@ static bool position_is_ok(const Position *position)
 		}
 	} else if (/*l->move < A1 ||*/ l->move > H8
 		 || board_is_occupied(&board, l->move)
-<<<<<<< HEAD
-<<<<<<< HEAD
-		 || board_get_move_flip(&board, l->move, &move) == 0) {
-=======
-		 || board_get_move(&board, l->move, &move) == 0) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		 || board_get_move_flip(&board, l->move, &move) == 0) {
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 			warn("leaf %s is wrong\n", move_to_string(l->move, WHITE, s));
 			position_print(position, &position->board, stdout);
 			return false;
@@ -718,15 +702,7 @@ static void position_search(Position *position, Book *book)
 
 	if (position->n_link < n_moves || (position->n_link == 0 && n_moves == 0 && position->score.value == -SCORE_INF)) {
 		search_set_board(search, &position->board, BLACK);
-<<<<<<< HEAD
-<<<<<<< HEAD
-		search_set_level(search, position->level, search->eval.n_empties);
-=======
-		search_set_level(search, position->level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		search_set_level(search, position->level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 
 		foreach_link (l, position) {
 			movelist_exclude(&search->movelist, l->move);
@@ -768,15 +744,7 @@ static void position_search(Position *position, Book *book)
 static void position_link(Position *position, Book *book)
 {
 	int x;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned long long moves = board_get_moves(&position->board);
-=======
-	unsigned long long moves = get_moves(position->board.player, position->board.opponent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	unsigned long long moves = board_get_moves(&position->board);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 	Board next;
 	Link link;
 	Position *child;
@@ -1075,42 +1043,17 @@ static void board_feed_hash(Board *board, const Book *book, Search *search, cons
 	const unsigned long long hash_code = board_get_hash_code(board);
 	MoveList movelist;
 	Move *m;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	HashStoreData hash_data;
-=======
-	HashStoreData hash_store_data;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	HashStoreData hash_data;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 
 	position = book_probe(book, board);
 	if (position) {
 		const int n_empties = board_count_empties(&position->board);
-<<<<<<< HEAD
-<<<<<<< HEAD
-		const int score = position->score.value;
-		int move = NOMOVE;
-
-		hash_data.data.wl.c.depth = LEVEL[position->level][n_empties].depth;
-		hash_data.data.wl.c.selectivity = LEVEL[position->level][n_empties].selectivity;
-
-=======
-		const int depth = LEVEL[position->level][n_empties].depth;
-		const int selectivity = LEVEL[position->level][n_empties].selectivity;
-		const int score = position->score.value;
-		int move = NOMOVE;
-
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		const int score = position->score.value;
 		int move = NOMOVE;
 
 		hash_data.data.wl.c.depth = LEVEL[position->level][n_empties].depth;
 		hash_data.data.wl.c.selectivity = LEVEL[position->level][n_empties].selectivity;
 
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
 		position_get_moves(position, board, &movelist);
 		foreach_move(m, movelist) {
 			if (move == NOMOVE) move = m->x;
@@ -1118,41 +1061,11 @@ static void board_feed_hash(Board *board, const Book *book, Search *search, cons
 				board_feed_hash(board, book, search, is_pv && m->score == score);
 			board_restore(board, m);
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
 
 		hash_data.data.lower = hash_data.data.upper = score;
 		hash_data.data.move[0] = move;
-<<<<<<< HEAD
-<<<<<<< HEAD
 		hash_feed(&search->hash_table, board, hash_code, &hash_data);
 		if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_data);
-=======
-		hash_feed(&search->hash_table, board, hash_code, depth, selectivity, score, score, move);
-		if (is_pv) hash_feed(&search->pv_table, board, hash_code, depth, selectivity, score, score, move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-
-<<<<<<< HEAD
-		hash_store_data.data.lower = hash_store_data.data.upper = score;
-		hash_store_data.data.move[0] = move;
-		hash_feed(&search->hash_table, board, hash_code, &hash_store_data);
-		if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_store_data);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-		hash_data.data.lower = hash_data.data.upper = score;
-		hash_data.data.move[0] = move;
-		hash_feed(&search->hash_table, board, hash_code, &hash_data);
-		if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_data);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-		hash_feed(&search->hash_table, HBOARD_P(board), hash_code, &hash_data);
-		if (is_pv) hash_feed(&search->pv_table, HBOARD_P(board), hash_code, &hash_data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-		hash_feed(&search->hash_table, board, hash_code, &hash_data);
-		if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 	}
 }
 
@@ -2384,15 +2297,7 @@ void book_add_game(Book *book, const Game *game)
 			stack[n_moves++] = MOVE_PASS;
 			board_pass(&board);
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
 		if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) {
-=======
-		if (!board_is_occupied(&board, game->move[i]) && board_get_move(&board, game->move[i], &stack[n_moves])) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) {
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 			board_update(&board, stack + n_moves);
 			++n_moves;
 		} else {
@@ -2472,15 +2377,7 @@ void book_check_game(Book *book, MoveHash *hash, const Game *game, BookCheckGame
 			stack[n_moves++] = MOVE_PASS;
 			board_pass(&board);
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) {
-=======
-		if (!board_is_occupied(&board, game->move[i]) && board_get_move(&board, game->move[i], &stack[n_moves])) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) {
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 			board_update(&board, stack + n_moves);
 			++n_moves;
 		} else {
diff --git a/src/cassio.c b/src/cassio.c
index df9f452..c262b44 100644
--- a/src/cassio.c
+++ b/src/cassio.c
@@ -13,23 +13,7 @@
  *  - With "-follow-cassio" Edax will follow more closely Cassio's search request. By default, it
  * searches with settings that make it better in tournament mode against Roxane, Cassio, etc.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
- * @date 1998 - 2020
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
- * @date 1998 - 2022
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -254,15 +238,7 @@ static void engine_observer(Result *result)
 static Search* engine_create_search(void)
 {
 	Search *search;
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-	
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
 
->>>>>>> 0a166fd (Remove 1 element array coding style)
 	search = (Search*) mm_malloc(sizeof (Search));
 	if (search == NULL) {
 		engine_send("ERROR: Cannot allocate a new search engine.");
@@ -326,23 +302,7 @@ static int engine_open(Search *search, const Board *board, const int player, con
 	if (player != search->player || !board_equal(&search->board, board)) {
 		search_set_board(search, board, player);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		if (hash_get_from_board(&search->pv_table, board, &hash_data)) {
-=======
-		if (hash_get(&search->pv_table, board, board_get_hash_code(board), &hash_data)) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		if (hash_get_from_board(&search->pv_table, board, &hash_data)) {
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
-		if (hash_get_from_board(&search->pv_table, HBOARD_P(board), &hash_data)) {
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-=======
-		if (hash_get_from_board(&search->pv_table, board, &hash_data)) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 			if (hash_data.lower == -SCORE_INF && hash_data.upper < SCORE_INF) score = hash_data.upper;
 			else if (hash_data.upper == +SCORE_INF && hash_data.lower > -SCORE_INF) score = hash_data.lower;
 			else score = (hash_data.upper + hash_data.lower) / 2;
@@ -454,36 +414,6 @@ void engine_free(void *v)
 
 void feed_all_hash_table(Search *search, Board *board, const int depth, const int selectivity, const int lower, const int upper, const int move)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	HashStoreData hash_data;
-	const unsigned long long hash_code = board_get_hash_code(board);
-
-<<<<<<< HEAD
-	hash_data.data.wl.c.depth = depth;
-	hash_data.data.wl.c.selectivity = selectivity;
-	hash_data.data.move[0] = move;
-	hash_data.data.lower = lower;
-	hash_data.data.upper = upper;
-	hash_feed(&search->hash_table, board, hash_code, &hash_data);
-	hash_feed(&search->pv_table, board, hash_code, &hash_data);
-=======
-	hash_feed(&search->hash_table, board, hash_code, depth, selectivity, lower, upper, move);
-	hash_feed(&search->pv_table, board, hash_code, depth, selectivity, lower, upper, move);	
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	HashStoreData hash_store_data;
-	const unsigned long long hash_code = board_get_hash_code(board);
-
-	hash_store_data.data.wl.c.depth = depth;
-	hash_store_data.data.wl.c.selectivity = selectivity;
-	hash_store_data.data.move[0] = move;
-	hash_store_data.data.lower = lower;
-	hash_store_data.data.upper = upper;
-	hash_feed(&search->hash_table, board, hash_code, &hash_store_data);
-	hash_feed(&search->pv_table, board, hash_code, &hash_store_data);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	HashStoreData hash_data;
 	const unsigned long long hash_code = board_get_hash_code(board);
 
@@ -492,19 +422,8 @@ void feed_all_hash_table(Search *search, Board *board, const int depth, const in
 	hash_data.data.move[0] = move;
 	hash_data.data.lower = lower;
 	hash_data.data.upper = upper;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	hash_feed(&search->hash_table, board, hash_code, &hash_data);
-	hash_feed(&search->pv_table, board, hash_code, &hash_data);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-	hash_feed(&search->hash_table, HBOARD_P(board), hash_code, &hash_data);
-	hash_feed(&search->pv_table, HBOARD_P(board), hash_code, &hash_data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 	hash_feed(&search->hash_table, board, hash_code, &hash_data);
 	hash_feed(&search->pv_table, board, hash_code, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 }
 
 /**
@@ -625,28 +544,12 @@ static bool skip_search(Engine *engine, int *old_score)
 		if (alpha < hash_data.lower) alpha = *old_score = hash_data.lower;
 		if (beta > hash_data.upper) beta = *old_score = hash_data.upper;
 		// skip search ?
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (hash_data.wl.c.depth >= search->depth && hash_data.wl.c.selectivity >= search->selectivity && alpha >= beta) {
-=======
-		if (hash_data.depth >= search->depth && hash_data.selectivity >= search->selectivity && alpha >= beta) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		if (hash_data.wl.c.depth >= search->depth && hash_data.wl.c.selectivity >= search->selectivity && alpha >= beta) {
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 			if (hash_data.move[0] != NOMOVE) movelist_sort_bestmove(movelist, hash_data.move[0]);
 			else if (hash_data.lower > SCORE_MIN) return false;
 			bestmove = movelist_first(movelist);
 			bestmove->score = *old_score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-			record_best_move(search, bestmove, options.alpha, options.beta, search->depth);
-=======
-			record_best_move(search, &search->board, bestmove, options.alpha, options.beta, search->depth);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 			record_best_move(search, bestmove, options.alpha, options.beta, search->depth);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 			bound =  search->result->bound + bestmove->x;
 
 			if (bound->lower != bound->upper || is_pv_ok(search, bestmove->x, search->depth)) {
@@ -657,32 +560,14 @@ static bool skip_search(Engine *engine, int *old_score)
 				cassio_debug("Edax does not skip the search : BAD PV!\n");
 			}
 		} else {
-<<<<<<< HEAD
-<<<<<<< HEAD
 			if (hash_data.wl.c.depth < search->depth || hash_data.wl.c.selectivity < search->selectivity) {
 				cassio_debug("Edax does not skip the search: Level %d@%d < %d@%d\n", hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, search->depth, selectivity_table[search->selectivity].percent);
-=======
-			if (hash_data.depth < search->depth || hash_data.selectivity < search->selectivity) {
-				cassio_debug("Edax does not skip the search: Level %d@%d < %d@%d\n", hash_data.depth, selectivity_table[hash_data.selectivity].percent, search->depth, selectivity_table[search->selectivity].percent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			if (hash_data.wl.c.depth < search->depth || hash_data.wl.c.selectivity < search->selectivity) {
-				cassio_debug("Edax does not skip the search: Level %d@%d < %d@%d\n", hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, search->depth, selectivity_table[search->selectivity].percent);
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 			} else {
 				cassio_debug("Edax does not skip the search: unsolved score alpha %d < beta %d\n", alpha, beta); 
 			}
 		}
 	} else {
-<<<<<<< HEAD
-<<<<<<< HEAD
-		cassio_debug("Edax does not skip the search: Position %s (hash=%llx) not found\n", board_to_string(&search->board, search->player, b), hash_code);
-=======
-		cassio_debug("Edax does not skip the search: Position %s (hash=%llx) not found\n", board_to_string(&search->board, search->player, b), board_get_hash_code(&search->board));
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		cassio_debug("Edax does not skip the search: Position %s (hash=%llx) not found\n", board_to_string(&search->board, search->player, b), hash_code);
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
 	}
 	
 	return false;
diff --git a/src/const.h b/src/const.h
index 52234db..5b025d3 100644
--- a/src/const.h
+++ b/src/const.h
@@ -3,23 +3,7 @@
  *
  * Constants as macros, enums, or global consts.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
-=======
- * @date 1998 - 2020
->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2023
->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op)
-=======
- * @date 1998 - 2024
->>>>>>> a09308f (Renew version string and copyright year)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -95,75 +79,12 @@ enum {
 	CUT_NODE,
 	ALL_NODE
 };
-<<<<<<< HEAD
-<<<<<<< HEAD
-typedef	unsigned char	NodeType;
-=======
-typedef	char	NodeType;
->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op)
-=======
 typedef	unsigned char	NodeType;
->>>>>>> 2ea1e4f (Change NodeType to unsigned char to fix gcc warning)
 
 #define VERSION 4
-<<<<<<< HEAD
-<<<<<<< HEAD
-#define RELEASE 5
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#define VERSION_STRING "4.5.3"
-#define EDAX_NAME "Edax 4.5.3"
-=======
-#define RELEASE 4
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#define VERSION_STRING "4.4.5"
-#define EDAX_NAME "Edax 4.4.5"
->>>>>>> 5124720 (-eval-file options added as documented; minor fix on console output)
-=======
-#define VERSION_STRING "4.4.6"
-#define EDAX_NAME "Edax 4.4.6"
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
-#define VERSION_STRING "4.4.7"
-#define EDAX_NAME "Edax 4.4.7"
->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c)
-=======
-#define VERSION_STRING "4.4.8"
-#define EDAX_NAME "Edax 4.4.8"
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-=======
 #define RELEASE 5
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-#define VERSION_STRING "4.5.0"
-#define EDAX_NAME "Edax 4.5.0"
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
-#define VERSION_STRING "4.5.1"
-#define EDAX_NAME "Edax 4.5.1"
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
-#define VERSION_STRING "4.5.2"
-#define EDAX_NAME "Edax 4.5.2"
->>>>>>> a9633d5 (Initial 4.5.2; some reformats)
-=======
-#define VERSION_STRING "4.5.1"
-#define EDAX_NAME "Edax 4.5.1"
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
-=======
-#define VERSION_STRING "4.5.2"
-#define EDAX_NAME "Edax 4.5.2"
->>>>>>> a09308f (Renew version string and copyright year)
-=======
 #define VERSION_STRING "4.5.3"
 #define EDAX_NAME "Edax 4.5.3"
->>>>>>> d8589d2 (Init 4.5.3: abandon size_reduced_movelist which confuses gcc warn)
 #define BOOK 0x424f4f4b
 #define EDAX 0x45444158
 #define EVAL 0x4556414c
diff --git a/src/count_last_flip_32.c b/src/count_last_flip_32.c
index 0c8d63c..492c266 100644
--- a/src/count_last_flip_32.c
+++ b/src/count_last_flip_32.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_32.c
  *
@@ -35,14 +31,6 @@
  * 
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#include "board.h"
-
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 #define LODWORD(l) ((unsigned int)(l))
 #define HIDWORD(l) ((unsigned int)((l)>>32))
 
@@ -119,15 +107,7 @@ static int count_last_flip_A1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x01010101u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x01010100u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x01010101u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 1) & 0x7f];
 	n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x08040200u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 25];
 
@@ -144,15 +124,7 @@ static int count_last_flip_B1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x02020202u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x02020200u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x02020202u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 2) & 0x3f];
 	n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x10080400u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 26];
 
@@ -169,15 +141,7 @@ static int count_last_flip_C1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x04040400u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_2[LODWORD(P) & 0xff];
 	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x20110A04u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24];	// A3C1H6
 
@@ -194,15 +158,7 @@ static int count_last_flip_D1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x08080800u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_3[LODWORD(P) & 0xff];
 	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x41221408u) + (HIDWORD(P) & 0x00000080u)) * 0x01010101u) >> 24];	// A4D1H5
 
@@ -219,15 +175,7 @@ static int count_last_flip_E1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x10101000u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_4[LODWORD(P) & 0xff];
 	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x82442810u) + (HIDWORD(P) & 0x00000001u)) * 0x01010101u) >> 24];	// A5E1H4
 
@@ -244,15 +192,7 @@ static int count_last_flip_F1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x20202020u) >> 4) + (HIDWORD(P) & 0x20202020u)) * 0x00810204u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) >> 4) & 0x02020200u)) * 0x00810204u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x20202020u) >> 4) + (HIDWORD(P) & 0x20202020u)) * 0x00810204u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_5[LODWORD(P) & 0xff];
 	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x04885020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24];	// A6F1H3
 
@@ -269,15 +209,7 @@ static int count_last_flip_G1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x40404040u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x40404000u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x40404040u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_L[(LODWORD(P) << 1) & 0x7e];
 	n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x08102000u) + (HIDWORD(P) & 0x00010204u)) * 0x02020202u) >> 24];
 
@@ -294,15 +226,7 @@ static int count_last_flip_H1(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x80808080u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25];
-=======
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x80808000u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x80808080u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_L[LODWORD(P) & 0x7f];
 	n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x10204000u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24];
 
@@ -1309,1245 +1233,3 @@ int (*count_last_flip[])(const unsigned long long) = {
 	count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8,
 	count_last_flip_pass,
 };
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_32.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move
- * for each square of the board. These functions are gathered into an array of
- * functions, so that a fast access to each function is allowed. The generic
- * form of the function take as input the player bitboard and return twice
- * the number of flipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
-* The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * With 135 degree merge, instead of Valery ClaudePierre's modification.
- *
- * @date 1998 - 2017
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-#include "board.h"
-
-#define LODWORD(l) ((unsigned int)(l))
-#define HIDWORD(l) ((unsigned int)((l)>>32))
-
-/** precomputed count flip array */
-static const char COUNT_FLIP_R[128] = {
-	 0,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	12,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0
-};
-
-static const char COUNT_FLIP_2[256] = {
-	 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0
-};
-
-static const char COUNT_FLIP_3[256] = {
-	 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_4[256] = {
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_5[256] = {
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_L[128] = {
-	 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-	 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-/**
- * Count last flipped discs when playing on square A1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x01010100u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25];
-	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 1) & 0x7f];
-	n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x08040200u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 25];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x02020200u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25];
-	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 2) & 0x3f];
-	n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x10080400u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 26];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x04040400u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25];
-	n_flipped += COUNT_FLIP_2[LODWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x20110A04u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24];	// A3C1H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x08080800u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25];
-	n_flipped += COUNT_FLIP_3[LODWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x41221408u) + (HIDWORD(P) & 0x00000080u)) * 0x01010101u) >> 24];	// A4D1H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x10101000u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25];
-	n_flipped += COUNT_FLIP_4[LODWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x82442810u) + (HIDWORD(P) & 0x00000001u)) * 0x01010101u) >> 24];	// A5E1H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) >> 4) & 0x02020200u)) * 0x00810204u) >> 25];
-	n_flipped += COUNT_FLIP_5[LODWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x04885020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24];	// A6F1H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x40404000u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25];
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) << 1) & 0x7e];
-	n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x08102000u) + (HIDWORD(P) & 0x00010204u)) * 0x02020202u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H1(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x80808000u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25];
-	n_flipped += COUNT_FLIP_L[LODWORD(P) & 0x7f];
-	n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x10204000u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x01010000u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 26];
-	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 9) & 0x7f];
-	n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x04020000u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 25];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x02020000u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 26];
-	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 10) & 0x3f];
-	n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x08040000u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 26];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x04040000u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 26];
-	n_flipped += COUNT_FLIP_2[(LODWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x110A0400u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24];	// A4C2H7
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((LODWORD(P) & 0x08080000u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 26];
-	n_flipped += COUNT_FLIP_3[(LODWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x22140800u) + (HIDWORD(P) & 0x00008041u)) * 0x01010101u) >> 24];	// A5D2H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x10100000u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 26];
-	n_flipped += COUNT_FLIP_4[(LODWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x44281000u) + (HIDWORD(P) & 0x00000182u)) * 0x01010101u) >> 24];	// A6E2H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20200000u) >> 4)) * 0x00810204u) >> 26];
-	n_flipped += COUNT_FLIP_5[(LODWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x88502000u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24];	// A7F2H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x40400000u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 26];
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 7) & 0x7e];
-	n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x10200000u) + (HIDWORD(P) & 0x01020408u)) * 0x02020202u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H2(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_R[((((LODWORD(P) & 0x80800000u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 26];
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 8) & 0x7f];
-	n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x20400000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((LODWORD(P) & 0x02010101u) * 0x01020404u + (HIDWORD(P) & 0x20100804u) * 0x04040404u) >> 24];	// A1A3F8
-	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 17) & 0x7f];
-	n_flipped += COUNT_FLIP_5[((LODWORD(P) & 0x01010204u) * 0x20202010u + (HIDWORD(P) & 0x01010101u) * 0x08040201u) >> 24];	// C1A3A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((LODWORD(P) & 0x04020202u) * 0x00810202u + (HIDWORD(P) & 0x40201008u) * 0x02020202u) >> 24];	// B1B3G8
-	n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 18) & 0x3f];
-	n_flipped += COUNT_FLIP_5[((LODWORD(P) & 0x02020408u) * 0x10101008u + ((HIDWORD(P) & 0x02020202u) >> 1) * 0x08040201u) >> 24];	// D1B3B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_2[(LODWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x02040810u) + (HIDWORD(P) & 0x00000001u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24];
-	n_flipped += COUNT_FLIP_3[(LODWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x04081020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x10080402u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_4[(LODWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x08102040u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x20100804u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_5[(LODWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x40201008u) + (HIDWORD(P) & 0x00000080u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[(((LODWORD(P) & 0x40402010u) >> 4) * 0x01010102u + (HIDWORD(P) & 0x40404040u) * 0x00408102u) >> 24];	// E1G3G8
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 15) & 0x7e];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x20404040u) >> 1) * 0x04020101u + ((HIDWORD(P) & 0x02040810u) >> 1) * 0x01010101u) >> 24];	// G1G3B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[(((LODWORD(P) & 0x80804020u) >> 4) * 0x00808081u + (HIDWORD(P) & 0x80808080u) * 0x00204081u) >> 24];	// F1H3H8
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 16) & 0x7f];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x40808080u) >> 2) * 0x04020101u + ((HIDWORD(P) & 0x04081020u) >> 2) * 0x01010101u) >> 24];	// H1H3C8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((LODWORD(P) & 0x01010101u) * 0x01020408u + (HIDWORD(P) & 0x10080402u) * 0x08080808u) >> 24];	// A1A4E8
-	n_flipped += COUNT_FLIP_R[LODWORD(P) >> 25];
-	n_flipped += COUNT_FLIP_4[((LODWORD(P) & 0x01020408u) * 0x10101010u + (HIDWORD(P) & 0x01010101u) * 0x08040201u) >> 24];	// D1A4A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((LODWORD(P) & 0x02020202u) * 0x00810204u + (HIDWORD(P) & 0x20100804u) * 0x04040404u) >> 24];	// B1B4F8
-	n_flipped += COUNT_FLIP_R[LODWORD(P) >> 26];
-	n_flipped += COUNT_FLIP_4[((LODWORD(P) & 0x02040810u) * 0x08080808u + ((HIDWORD(P) & 0x02020202u) >> 1) * 0x08040201u) >> 24];	// E1B4B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_2[LODWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x04081020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x04020100u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24];
-	n_flipped += COUNT_FLIP_3[LODWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x08102040u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_4[LODWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x10080402u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_5[LODWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x20408000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x20100804u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[(((LODWORD(P) & 0x40201008u) >> 3) * 0x01010101u + (HIDWORD(P) & 0x40404040u) * 0x00408102u) >> 24];	// D1G4G8
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 23) & 0x7e];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x40404040u) >> 2) * 0x08040201u + ((HIDWORD(P) & 0x04081020u) >> 2) * 0x01010101u) >> 24];	// G1G4C8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[(((LODWORD(P) & 0x80402010u) >> 4) * 0x01010101u + (HIDWORD(P) & 0x80808080u) * 0x00204081u) >> 24];	// E1H4H8
-	n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 24) & 0x7f];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x80808080u) >> 3) * 0x08040201u + ((HIDWORD(P) & 0x08102040u) >> 3) * 0x01010101u) >> 24];	// H1H4D8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((LODWORD(P) & 0x01010101u) * 0x01020408u + (HIDWORD(P) & 0x08040201u) * 0x10101010u) >> 24];	// A1A5D8
-	n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 1) & 0x7f];
-	n_flipped += COUNT_FLIP_3[((LODWORD(P) & 0x02040810u) * 0x08080808u + (HIDWORD(P) & 0x01010101u) * 0x08040201u) >> 24];	// E1A5A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((LODWORD(P) & 0x02020202u) * 0x00810204u + (HIDWORD(P) & 0x10080402u) * 0x08080808u) >> 24];	// B1B5E8
-	n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 2) & 0x3f];
-	n_flipped += COUNT_FLIP_3[((LODWORD(P) & 0x04081020u) * 0x04040404u + ((HIDWORD(P) & 0x02020202u) >> 1) * 0x08040201u) >> 24];	// F1B5B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_2[HIDWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x08102040u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x02010000u) + (HIDWORD(P) & 0x20100804u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24];
-	n_flipped += COUNT_FLIP_3[HIDWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x04020100u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_4[HIDWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x20408000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_5[HIDWORD(P) & 0xff];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x40800000u) + (HIDWORD(P) & 0x04081020u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x10080402u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[(((LODWORD(P) & 0x20100804u) >> 2) * 0x01010101u + (HIDWORD(P) & 0x40404040u) * 0x00408102u) >> 24];	// C1G5G8
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) << 1) & 0x7e];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x40404040u) >> 3) * 0x10080402u + ((HIDWORD(P) & 0x08102040u) >> 3) * 0x01010101u) >> 24];	// G1G5D8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[(((LODWORD(P) & 0x40201008u) >> 3) * 0x01010101u + (HIDWORD(P) & 0x80808080u) * 0x00204081u) >> 24];	// D1H5H8
-	n_flipped += COUNT_FLIP_L[HIDWORD(P) & 0x7f];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x80808080u) >> 4) * 0x10080402u + ((HIDWORD(P) & 0x10204080u) >> 4) * 0x01010101u) >> 24];	// H1H5E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((LODWORD(P) & 0x01010101u) * 0x01020408u + (HIDWORD(P) & 0x04020101u) * 0x10202020u) >> 24];	// A1A6C8
-	n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 9) & 0x7f];
-	n_flipped += COUNT_FLIP_2[((LODWORD(P) & 0x04081020u) * 0x04040404u + (HIDWORD(P) & 0x01010102u) * 0x04040201u) >> 24];	// F1A6A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((LODWORD(P) & 0x02020202u) * 0x00810204u + (HIDWORD(P) & 0x08040202u) * 0x08101010u) >> 24];	// B1B6D8
-	n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 10) & 0x3f];
-	n_flipped += COUNT_FLIP_2[((LODWORD(P) & 0x08102040u) * 0x02020202u + ((HIDWORD(P) & 0x02020204u) >> 1) * 0x04040201u) >> 24];	// G1B6B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_2[(HIDWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x01000000u) + (HIDWORD(P) & 0x10080402u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24];
-	n_flipped += COUNT_FLIP_3[(HIDWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x20408000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x02010000u) + (HIDWORD(P) & 0x20100804u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_4[(HIDWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x40800000u) + (HIDWORD(P) & 0x04081020u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x04020100u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_5[(HIDWORD(P) >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x80000000u) + (HIDWORD(P) & 0x08102040u)) * 0x01010101u) >> 24];
-	n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[(((LODWORD(P) & 0x10080402u) >> 1) * 0x01010101u + (HIDWORD(P) & 0x40404020u) * 0x00808102u) >> 24];	// B1G6G8
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 7) & 0x7e];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x40404040u) >> 4) * 0x20100804u + ((HIDWORD(P) & 0x10204040u) >> 4 ) * 0x02010101u) >> 24];	// G1G6E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[(((LODWORD(P) & 0x20100804u) >> 2) * 0x01010101u + (HIDWORD(P) & 0x80808040u) * 0x00404081u) >> 24];	// C1H6H8
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 8) & 0x7f];
-	n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x80808080u) >> 5) * 0x20100804u + ((HIDWORD(P) & 0x20408080u) >> 5) * 0x02010101u) >> 24];	// H1H6F8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((((HIDWORD(P) & 0x00000101u) << 4) + (LODWORD(P) & 0x01010101u)) * 0x02040810u) >> 24];
-	n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 17) & 0x7f];
-	n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00000204u) + (LODWORD(P) & 0x08102040u)) * 0x01010101u) >> 25];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((((HIDWORD(P) & 0x00000202u) << 4) + (LODWORD(P) & 0x02020202u)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 18) & 0x3f];
-	n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00000408u) + (LODWORD(P) & 0x10204080u)) * 0x01010101u) >> 26];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x00000404u) << 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_2[(HIDWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[(((HIDWORD(P) & 0x00040A11u) + (LODWORD(P) & 0x20408000u)) * 0x01010101u) >> 24];	// A5C7H2
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((((HIDWORD(P) & 0x00000808u) << 4) + (LODWORD(P) & 0x08080808u)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_3[(HIDWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[(((HIDWORD(P) & 0x00081422u) + (LODWORD(P) & 0x41800000u)) * 0x01010101u) >> 24];	// A4D7H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00001010u) + ((LODWORD(P) & 0x10101010u) >> 4)) * 0x02040810u) >> 24];
-	n_flipped += COUNT_FLIP_4[(HIDWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[(((HIDWORD(P) & 0x00102844u) + (LODWORD(P) & 0x82010000u)) * 0x01010101u) >> 24];	// A3E7H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00002020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_5[(HIDWORD(P) >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[(((HIDWORD(P) & 0x00205088u) + (LODWORD(P) & 0x04020100u)) * 0x01010101u) >> 24];	// A2F7H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00004040u) + ((LODWORD(P) & 0x40404040u) >> 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 15) & 0x7e];
-	n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00002010u) + (LODWORD(P) & 0x08040201u)) * 0x02020202u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00008080u) + ((LODWORD(P) & 0x80808080u) >> 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 16) & 0x7f];
-	n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00004020u) + (LODWORD(P) & 0x10080402u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((((HIDWORD(P) & 0x00010101u) << 4) + (LODWORD(P) & 0x01010101u)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_R[HIDWORD(P) >> 25];
-	n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00020408u) + (LODWORD(P) & 0x10204080u)) * 0x01010101u) >> 25];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((((HIDWORD(P) & 0x00020202u) << 4) + (LODWORD(P) & 0x02020202u)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_R[HIDWORD(P) >> 26];
-	n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00040810u) + (LODWORD(P) & 0x20408000u)) * 0x01010101u) >> 26];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x00040404u) << 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_2[HIDWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_2[(((HIDWORD(P) & 0x040A1120u) + (LODWORD(P) & 0x40800000u)) * 0x01010101u) >> 24];	// A6C8H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((((HIDWORD(P) & 0x00080808u) << 4) + (LODWORD(P) & 0x08080808u)) * 0x00204081u) >> 24];
-	n_flipped += COUNT_FLIP_3[HIDWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_3[(((HIDWORD(P) & 0x08142241u) + (LODWORD(P) & 0x80000000u)) * 0x01010101u) >> 24];	// A5D8H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00101010u) + ((LODWORD(P) & 0x10101010u) >> 4)) * 0x01020408u) >> 24];
-	n_flipped += COUNT_FLIP_4[HIDWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_4[(((HIDWORD(P) & 0x10284482u) + (LODWORD(P) & 0x01000000u)) * 0x01010101u) >> 24];	// A4E8H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24];
-	n_flipped += COUNT_FLIP_5[HIDWORD(P) >> 24];
-	n_flipped += COUNT_FLIP_5[(((HIDWORD(P) & 0x00508804u) + (LODWORD(P) & 0x02010000u)) * 0x01010101u) >> 24];	// A3F8H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00404040u) + ((LODWORD(P) & 0x40404040u) >> 4)) * 0x00408102u) >> 24];
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 23) & 0x7e];
-	n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00201008u) + (LODWORD(P) & 0x04020100u)) * 0x02020202u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[(((HIDWORD(P) & 0x00808080u) + ((LODWORD(P) & 0x80808080) >> 4)) * 0x00204081u) >> 24];
-	n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 24) & 0x7f];
-	n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00402010u) + (LODWORD(P) & 0x08040201u)) * 0x01010101u) >> 24];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when plassing.
- *
- * @param P player's disc pattern (unused).
- * @return zero.
- */
-static int count_last_flip_pass(const unsigned long long P)
-{
-	(void) P; // useless code to shut-up compiler warning
-	return 0;
-}
-
-/** Array of functions to count flipped discs of the last move */
-int (*count_last_flip[])(const unsigned long long) = {
-	count_last_flip_A1, count_last_flip_B1, count_last_flip_C1, count_last_flip_D1,
-	count_last_flip_E1, count_last_flip_F1, count_last_flip_G1, count_last_flip_H1,
-	count_last_flip_A2, count_last_flip_B2, count_last_flip_C2, count_last_flip_D2,
-	count_last_flip_E2, count_last_flip_F2, count_last_flip_G2, count_last_flip_H2,
-	count_last_flip_A3, count_last_flip_B3, count_last_flip_C3, count_last_flip_D3,
-	count_last_flip_E3, count_last_flip_F3, count_last_flip_G3, count_last_flip_H3,
-	count_last_flip_A4, count_last_flip_B4, count_last_flip_C4, count_last_flip_D4,
-	count_last_flip_E4, count_last_flip_F4, count_last_flip_G4, count_last_flip_H4,
-	count_last_flip_A5, count_last_flip_B5, count_last_flip_C5, count_last_flip_D5,
-	count_last_flip_E5, count_last_flip_F5, count_last_flip_G5, count_last_flip_H5,
-	count_last_flip_A6, count_last_flip_B6, count_last_flip_C6, count_last_flip_D6,
-	count_last_flip_E6, count_last_flip_F6, count_last_flip_G6, count_last_flip_H6,
-	count_last_flip_A7, count_last_flip_B7, count_last_flip_C7, count_last_flip_D7,
-	count_last_flip_E7, count_last_flip_F7, count_last_flip_G7, count_last_flip_H7,
-	count_last_flip_A8, count_last_flip_B8, count_last_flip_C8, count_last_flip_D8,
-	count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8,
-	count_last_flip_pass,
-};
->>>>>>> e558fdb (Some cleanups for clang / android build)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/count_last_flip_avx512cd.c b/src/count_last_flip_avx512cd.c
index a407b07..54de932 100644
--- a/src/count_last_flip_avx512cd.c
+++ b/src/count_last_flip_avx512cd.c
@@ -7,15 +7,7 @@
  * For optimization purpose, the value returned is twice the number of flipped
  * disc, to facilitate the computation of disc difference.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 2023 - 2024
-=======
- * @date 2023
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-=======
- * @date 2023 - 2024
->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut)
  * @author Toshihiko Okuhara
  * @version 4.5
  * 
@@ -23,15 +15,7 @@
 
 #include "bit.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-extern	const V8DI lrmask[66];
-=======
-extern	const V4DI lmask_v4[66], rmask_v4[66];
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-=======
 extern	const V8DI lrmask[66];
->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut)
 
 /**
  * Count last flipped discs when playing on the last empty.
@@ -44,8 +28,6 @@ extern	const V8DI lrmask[66];
 int last_flip(int pos, unsigned long long P)
 {
 	__m256i PP = _mm256_set1_epi64x(P);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	__m256i	flip, outflank, eraser, rmask, lmask;
 	__m128i	flip2;
 
@@ -56,34 +38,6 @@ int last_flip(int pos, unsigned long long P)
 	flip = _mm256_maskz_add_epi64(_mm256_test_epi64_mask(PP, lmask), outflank, _mm256_set1_epi64x(-1));
 	// flip = _mm256_and_si256(_mm256_andnot_si256(outflank, flip), lmask);
 	flip = _mm256_ternarylogic_epi64(outflank, flip, lmask, 0x08);
-<<<<<<< HEAD
-
-		// right: look for player bit with lzcnt
-	rmask = lrmask[pos].v4[1];
-	eraser = _mm256_srlv_epi64(_mm256_set1_epi64x(-1),
-		_mm256_maskz_lzcnt_epi64(_mm256_test_epi64_mask(PP, rmask), _mm256_and_si256(PP, rmask)));
-	// flip = _mm256_or_si256(flip, _mm256_andnot_si256(eraser, rmask));
-	flip = _mm256_ternarylogic_epi64(flip, eraser, rmask, 0xf2);
-
-	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-	return 2 * bit_count(_mm_cvtsi128_si64(_mm_or_si128(flip2, _mm_unpackhi_epi64(flip2, flip2))));
-=======
-	__m256i	flip, outflank, rmask, lmask;
-=======
-	__m256i	flip, outflank, eraser, rmask, lmask;
->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut)
-	__m128i	flip2;
-
-		// left: look for player LS1B
-	lmask = lrmask[pos].v4[0];
-	outflank = _mm256_and_si256(PP, lmask);
-		// set below LS1B if P is in lmask
-	// flip = _mm256_andnot_si256(outflank, _mm256_add_epi64(outflank, _mm256_set1_epi64x(-1)));
-	// flip = _mm256_maskz_and_epi64(_mm256_test_epi64_mask(PP, lmask), flip, lmask);
-	flip = _mm256_maskz_ternarylogic_epi64(_mm256_test_epi64_mask(PP, lmask),
-		outflank, _mm256_add_epi64(outflank, _mm256_set1_epi64x(-1)), lmask, 0x08);
-=======
->>>>>>> eb84eb8 (Revise avx512 mask usage to ease ternarylogic opt)
 
 		// right: look for player bit with lzcnt
 	rmask = lrmask[pos].v4[1];
@@ -93,11 +47,5 @@ int last_flip(int pos, unsigned long long P)
 	flip = _mm256_ternarylogic_epi64(flip, eraser, rmask, 0xf2);
 
 	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-<<<<<<< HEAD
-	flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));
-	return 2 * bit_count(_mm_cvtsi128_si64(flip2));
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-=======
 	return 2 * bit_count(_mm_cvtsi128_si64(_mm_or_si128(flip2, _mm_unpackhi_epi64(flip2, flip2))));
->>>>>>> eb84eb8 (Revise avx512 mask usage to ease ternarylogic opt)
 }
diff --git a/src/count_last_flip_bitscan.c b/src/count_last_flip_bitscan.c
index dca921a..6cae24a 100644
--- a/src/count_last_flip_bitscan.c
+++ b/src/count_last_flip_bitscan.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_bitscan.c
  *
@@ -83,8 +79,6 @@ static const char COUNT_FLIP_5[256] = {
 	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #include "bit_intrinsics.h"
 
 #ifdef lzcnt_u64
@@ -98,46 +92,15 @@ static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
 		return (lzcnt_u32((P << (8 - pos)) & (mask << 1)) & 0x07) * 2;
 	else
 		return (lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2;
-=======
-#include "bit.h"
-=======
-#include "bit_intrinsics.h"
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-#ifdef lzcnt_u64
-
-static inline int count_V_flip_reverse (unsigned long long P, int ofs) {
-	return (lzcnt_u64(P << ofs) & 0x38) >> 2;
-}
-
-static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
-<<<<<<< HEAD
-	return (_lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	if (pos < 8)
-		return (lzcnt_u32((P << (8 - pos)) & (mask << 1)) & 0x07) * 2;
-	else
-		return (lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 #else
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 // with guardian bit to avoid __builtin_clz(0)	// Not used
 static inline int count_V_flip_reverse (unsigned long long P, int ofs) {
 	return ((__builtin_clzll((P << ofs) | 1) + 1) & 0x38) >> 2;
 }
 
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static const char COUNT_FLIP_L[128] = {
 	 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
 	 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
@@ -145,54 +108,15 @@ static const char COUNT_FLIP_L[128] = {
 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
-	if (pos < 8)
-		return COUNT_FLIP_L[(P << (7 - pos)) & mask];
-	else
-		return COUNT_FLIP_L[(P >> (pos - 7)) & mask];
-=======
-static inline int count_H1_flip_left (unsigned long long P, int pos, int mask) {
-	return COUNT_FLIP_L[(P << (7 - pos)) & mask];
-}
-
-static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
-	return COUNT_FLIP_L[(P >> (pos - 7)) & mask];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
 	if (pos < 8)
 		return COUNT_FLIP_L[(P << (7 - pos)) & mask];
 	else
 		return COUNT_FLIP_L[(P >> (pos - 7)) & mask];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef tzcnt_u32
-
-static inline int count_H_flip_right (unsigned long long P, int pos) {
-	if (pos >= 56)
-		return (tzcnt_u32(P >> (pos + 1)) & 0x07) * 2;
-	else if ((pos >= 24) && (pos < 32))
-		return (tzcnt_u32((unsigned int) P >> (pos + 1)) & 0x07) * 2;
-	else
-		return (tzcnt_u32((P >> (pos + 1)) & (0x7f >> (pos & 0x07))) & 0x07) * 2;
-=======
-#if (defined(__BMI__) || defined(__AVX2__)) && !(defined(__GNUC__) && (__GNUC__ < 6))	// GCC Bug 78037
-
-static inline int count_H_flip_right (unsigned long long P, int pos, int mask) {
-	return (_tzcnt_u32((P >> (pos + 1)) & mask) & 0x07) * 2;
-}
-
-static inline int count_H8_flip_right (unsigned long long P, int pos) {
-	return (_tzcnt_u32(P >> (pos + 1)) & 0x07) * 2;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #ifdef tzcnt_u32
 
 static inline int count_H_flip_right (unsigned long long P, int pos) {
@@ -202,7 +126,6 @@ static inline int count_H_flip_right (unsigned long long P, int pos) {
 		return (tzcnt_u32((unsigned int) P >> (pos + 1)) & 0x07) * 2;
 	else
 		return (tzcnt_u32((P >> (pos + 1)) & (0x7f >> (pos & 0x07))) & 0x07) * 2;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 #else
@@ -214,10 +137,6 @@ static const char COUNT_FLIP_R[128] = {
 	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,  8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static inline int count_H_flip_right (unsigned long long P, int pos) {
 	if (pos >= 56)
 		return COUNT_FLIP_R[P >> (pos + 1)];
@@ -225,37 +144,14 @@ static inline int count_H_flip_right (unsigned long long P, int pos) {
 		return COUNT_FLIP_R[(unsigned int) P >> (pos + 1)];
 	else
 		return COUNT_FLIP_R[(P >> (pos + 1)) & (0x7f >> (pos & 0x07))];
-<<<<<<< HEAD
-=======
-static inline int count_H_flip_right (unsigned long long P, int pos, int mask) {
-	return COUNT_FLIP_R[(P >> (pos + 1)) & mask];
-}
-
-static inline int count_H8_flip_right (unsigned long long P, int pos) {
-	return COUNT_FLIP_R[P >> (pos + 1)];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef lzcnt_u64
-
-/**
- * Count last flipped discs when playing on square A1/A2.
-=======
-/**
- * Count last flipped discs when playing on square A1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #ifndef lzcnt_u64
 
 /**
  * Count last flipped discs when playing on square A1/A2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -265,33 +161,15 @@ static int count_last_flip_A1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v, P_d9;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	P_v = P & 0x0101010101010100;
 	n_flipped  = ((P_v & -P_v) * 0x000020406080a0c0) >> 60;
 	n_flipped += count_H_flip_right(P, 0);
 	P_d9 = P & 0x8040201008040200;
 	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503) >> 60;
-<<<<<<< HEAD
-=======
-	P_v = P & 0x0101010101010100ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000020406080a0c0ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 0, 0x7f);
-	P_d9 = P & 0x8040201008040200ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503ULL) >> 60;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_A2(const unsigned long long P) {
 	return count_last_flip_A1(P >> 8);
 }
@@ -304,17 +182,8 @@ static int count_last_flip_A7(const unsigned long long P) {
 	return count_last_flip_A1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
 /**
  * Count last flipped discs when playing on square B1/B2.
-=======
-/**
- * Count last flipped discs when playing on square B1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-/**
- * Count last flipped discs when playing on square B1/B2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -324,33 +193,15 @@ static int count_last_flip_B1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v, P_d9;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	P_v = P & 0x0202020202020200;
 	n_flipped  = ((P_v & -P_v) * 0x0000102030405060) >> 60;
 	n_flipped += count_H_flip_right(P, 1);
 	P_d9 = P & 0x0080402010080400;
 	n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140) >> 60;
-<<<<<<< HEAD
-=======
-	P_v = P & 0x0202020202020200ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000102030405060ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 1, 0x3f);
-	P_d9 = P & 0x0080402010080400ULL;
-	n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140ULL) >> 60;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_B2(const unsigned long long P) {
 	return count_last_flip_B1(P >> 8);
 }
@@ -363,17 +214,8 @@ static int count_last_flip_B7(const unsigned long long P) {
 	return count_last_flip_B1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square C1/C2.
-=======
-/**
- * Count last flipped discs when playing on square C1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /**
  * Count last flipped discs when playing on square C1/C2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -383,32 +225,14 @@ static int count_last_flip_C1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	P_v = P & 0x0404040404040400;
-	n_flipped  = ((P_v & -P_v) * 0x0000081018202830) >> 60;
-	n_flipped += COUNT_FLIP_2[P & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04) * 0x0101010101010101) >> 56];	// A3C1H6
-=======
-	P_v = P & 0x0404040404040400ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000081018202830ULL) >> 60;
-	n_flipped += COUNT_FLIP_2[P & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04ULL) * 0x0101010101010101ULL) >> 56];	// A3C1H6
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	P_v = P & 0x0404040404040400;
 	n_flipped  = ((P_v & -P_v) * 0x0000081018202830) >> 60;
 	n_flipped += COUNT_FLIP_2[P & 0xff];
 	n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04) * 0x0101010101010101) >> 56];	// A3C1H6
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_C2(const unsigned long long P) {
 	return count_last_flip_C1(P >> 8);
 }
@@ -421,17 +245,8 @@ static int count_last_flip_C7(const unsigned long long P) {
 	return count_last_flip_C1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square D1/D2.
-=======
-/**
- * Count last flipped discs when playing on square D1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /**
  * Count last flipped discs when playing on square D1/D2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -441,32 +256,14 @@ static int count_last_flip_D1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	P_v = P & 0x0808080808080800;
-	n_flipped  = ((P_v & -P_v) * 0x000004080c101418) >> 60;
-	n_flipped += COUNT_FLIP_3[P & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408) * 0x0101010101010101) >> 56];	// A4D1H5
-=======
-	P_v = P & 0x0808080808080800ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000004080c101418ULL) >> 60;
-	n_flipped += COUNT_FLIP_3[P & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408ULL) * 0x0101010101010101ULL) >> 56];	// A4D1H5
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	P_v = P & 0x0808080808080800;
 	n_flipped  = ((P_v & -P_v) * 0x000004080c101418) >> 60;
 	n_flipped += COUNT_FLIP_3[P & 0xff];
 	n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408) * 0x0101010101010101) >> 56];	// A4D1H5
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_D2(const unsigned long long P) {
 	return count_last_flip_D1(P >> 8);
 }
@@ -479,17 +276,8 @@ static int count_last_flip_D7(const unsigned long long P) {
 	return count_last_flip_D1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square E1/E2.
-=======
-/**
- * Count last flipped discs when playing on square E1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /**
  * Count last flipped discs when playing on square E1/E2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -499,32 +287,14 @@ static int count_last_flip_E1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	P_v = P & 0x1010101010101000;
-	n_flipped  = ((P_v & -P_v) * 0x0000020406080a0c) >> 60;
-	n_flipped += COUNT_FLIP_4[P & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810) * 0x0101010101010101) >> 56];	// A5E1H4
-=======
-	P_v = P & 0x1010101010101000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000020406080a0cULL) >> 60;
-	n_flipped += COUNT_FLIP_4[P & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810ULL) * 0x0101010101010101ULL) >> 56];	// A5E1H4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	P_v = P & 0x1010101010101000;
 	n_flipped  = ((P_v & -P_v) * 0x0000020406080a0c) >> 60;
 	n_flipped += COUNT_FLIP_4[P & 0xff];
 	n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810) * 0x0101010101010101) >> 56];	// A5E1H4
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_E2(const unsigned long long P) {
 	return count_last_flip_E1(P >> 8);
 }
@@ -537,17 +307,8 @@ static int count_last_flip_E7(const unsigned long long P) {
 	return count_last_flip_E1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square F1/F2.
-=======
-/**
- * Count last flipped discs when playing on square F1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /**
  * Count last flipped discs when playing on square F1/F2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -557,32 +318,14 @@ static int count_last_flip_F1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	P_v = P & 0x2020202020202000;
-	n_flipped  = ((P_v & -P_v) * 0x0000010203040506) >> 60;
-	n_flipped += COUNT_FLIP_5[P & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020) * 0x0101010101010101) >> 56];	// A6F1H3
-=======
-	P_v = P & 0x2020202020202000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000010203040506ULL) >> 60;
-	n_flipped += COUNT_FLIP_5[P & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020ULL) * 0x0101010101010101ULL) >> 56];	// A6F1H3
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	P_v = P & 0x2020202020202000;
 	n_flipped  = ((P_v & -P_v) * 0x0000010203040506) >> 60;
 	n_flipped += COUNT_FLIP_5[P & 0xff];
 	n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020) * 0x0101010101010101) >> 56];	// A6F1H3
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_F2(const unsigned long long P) {
 	return count_last_flip_F1(P >> 8);
 }
@@ -595,17 +338,8 @@ static int count_last_flip_F7(const unsigned long long P) {
 	return count_last_flip_F1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square G1/G2.
-=======
-/**
- * Count last flipped discs when playing on square G1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /**
  * Count last flipped discs when playing on square G1/G2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -615,33 +349,15 @@ static int count_last_flip_G1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v, P_d7;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	P_v = P & 0x4040404040404000;
 	n_flipped  = ((P_v & -P_v) * 0x0000008101820283) >> 60;
 	n_flipped += count_H_flip_left(P, 6, 0x7e);
 	P_d7 = P & 0x0001020408102000;
 	n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000) >> 60;
-<<<<<<< HEAD
-=======
-	P_v = P & 0x4040404040404000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000008101820283ULL) >> 60;
-	n_flipped += count_H1_flip_left(P, 6, 0x7e);
-	P_d7 = P & 0x0001020408102000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000ULL) >> 60;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_G2(const unsigned long long P) {
 	return count_last_flip_G1(P >> 8);
 }
@@ -654,17 +370,8 @@ static int count_last_flip_G7(const unsigned long long P) {
 	return count_last_flip_G1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square H1/H2.
-=======
-/**
- * Count last flipped discs when playing on square H1.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /**
  * Count last flipped discs when playing on square H1/H2.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -674,31 +381,15 @@ static int count_last_flip_H1(const unsigned long long P)
 	int n_flipped;
 	unsigned long long P_v, P_d7;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	P_v = P & 0x8080808080808000;
 	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000008101820283) >> 60;
 	n_flipped += count_H_flip_left(P, 7, 0x7f);
 	P_d7 = P & 0x0102040810204000;
 	n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0) >> 60;
-<<<<<<< HEAD
-=======
-	P_v = P & 0x8080808080808000ULL;
-	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000008101820283ULL) >> 60;
-	n_flipped += count_H1_flip_left(P, 7, 0x7f);
-	P_d7 = P & 0x0102040810204000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0ULL) >> 60;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 static int count_last_flip_H2(const unsigned long long P) {
 	return count_last_flip_H1(P >> 8);
 }
@@ -712,690 +403,258 @@ static int count_last_flip_H7(const unsigned long long P) {
 }
 
 #endif // no lzcnt_u64
-=======
+
 /**
- * Count last flipped discs when playing on square A2.
+ * Count last flipped discs when playing on square C3.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_A2(const unsigned long long P)
+static int count_last_flip_C3(const unsigned long long P)
 {
 	int n_flipped;
-	unsigned long long P_v, P_d9;
 
-	P_v = P & 0x0101010101010000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x00000020406080a0ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 8, 0x7f);
-	P_d9 = P & 0x4020100804020000ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x00000010100c0805ULL) >> 60;
+	n_flipped  = COUNT_FLIP_2[((P & 0x0404040404040404) * 0x0040810204081020) >> 56];
+	n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff];
+	n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
-=======
-static int count_last_flip_H2(const unsigned long long P) {
-	return count_last_flip_H1(P >> 8);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-static int count_last_flip_H8(const unsigned long long P) {
-	return count_last_flip_H1(vertical_mirror(P));
-}
-
-static int count_last_flip_H7(const unsigned long long P) {
-	return count_last_flip_H1(vertical_mirror(P) >> 8);
 }
 
-<<<<<<< HEAD
 /**
- * Count last flipped discs when playing on square D2.
+ * Count last flipped discs when playing on square D3.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_D2(const unsigned long long P)
+static int count_last_flip_D3(const unsigned long long P)
 {
 	int n_flipped;
-	unsigned long long P_v;
 
-	P_v = P & 0x0808080808080000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x00000004080c1014ULL) >> 60;
-	n_flipped += COUNT_FLIP_3[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000804122140800ULL) * 0x0101010101010101ULL) >> 56];	// A5D2H6
+	n_flipped  = COUNT_FLIP_2[((P & 0x0808080808080808) * 0x0020408102040810) >> 56];
+	n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff];
+	n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square E2.
+ * Count last flipped discs when playing on square E3.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_E2(const unsigned long long P)
+static int count_last_flip_E3(const unsigned long long P)
 {
 	int n_flipped;
-	unsigned long long P_v;
 
-	P_v = P & 0x1010101010100000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000000020406080aULL) >> 60;
-	n_flipped += COUNT_FLIP_4[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000018244281000ULL) * 0x0101010101010101ULL) >> 56];	// A6E2H5
+	n_flipped  = COUNT_FLIP_2[((P & 0x1010101010101010) * 0x0010204081020408) >> 56];
+	n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff];
+	n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square F2.
+ * Count last flipped discs when playing on square F3.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_F2(const unsigned long long P)
+static int count_last_flip_F3(const unsigned long long P)
 {
 	int n_flipped;
-	unsigned long long P_v;
 
-	P_v = P & 0x2020202020200000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000000102030405ULL) >> 60;
-	n_flipped += COUNT_FLIP_5[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0001020488502000ULL) * 0x0101010101010101ULL) >> 56];	// A7F2H4
+	n_flipped  = COUNT_FLIP_2[((P & 0x2020202020202020) * 0x0008102040810204) >> 56];
+	n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff];
+	n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square G2.
+ * Count last flipped discs when playing on square A4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_G2(const unsigned long long P)
+static int count_last_flip_A4(const unsigned long long P)
 {
 	int n_flipped;
-	unsigned long long P_v, P_d7;
 
-	P_v = P & 0x4040404040400000ULL;
-	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000000102030405ULL) >> 60;
-	n_flipped += count_H_flip_left(P, 14, 0x7e);
-	P_d7 = P & 0x0102040810200000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x00000002081840a0ULL) >> 60;
+	n_flipped  = COUNT_FLIP_3[((P & 0x1008040201010101) * 0x0102040808080808) >> 56];	// A1A4E8
+	n_flipped += count_H_flip_right(P, 24);
+	n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408) * 0x1010101008040201) >> 56];	// D1A4A8
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square H2.
+ * Count last flipped discs when playing on square B4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_H2(const unsigned long long P)
+static int count_last_flip_B4(const unsigned long long P)
 {
 	int n_flipped;
-	unsigned long long P_v, P_d7;
 
-	P_v = P & 0x8080808080800000ULL;
-	n_flipped  = (((P_v & -P_v) >> 2) * 0x0000000102030405ULL) >> 60;
-	n_flipped += count_H_flip_left(P, 15, 0x7f);
-	P_d7 = P & 0x0204081020400000ULL;
-	n_flipped += (((P_d7 & -P_d7) >> 2) * 0x0000000410308143ULL) >> 60;
+	n_flipped  = COUNT_FLIP_3[((P & 0x2010080402020202) * 0x0081020404040404) >> 56];	// B1B4F8
+	n_flipped += count_H_flip_right(P, 25);
+	n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56];	// E1B4B8
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square A3.
+ * Count last flipped discs when playing on square C4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_A3(const unsigned long long P)
+static int count_last_flip_C4(const unsigned long long P)
 {
 	int n_flipped;
 
-	n_flipped  = COUNT_FLIP_2[((P & 0x2010080402010101ULL) * 0x0102040404040404ULL) >> 56];	// A1A3F8
-	n_flipped += count_H_flip_right(P, 16, 0x7f);
-	n_flipped += COUNT_FLIP_5[((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56];	// C1A3A8
+	n_flipped  = COUNT_FLIP_3[((P & 0x0404040404040404) * 0x0040810204081020) >> 56];
+	n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff];
+	n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square B3.
+ * Count last flipped discs when playing on square D4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_B3(const unsigned long long P)
+static int count_last_flip_D4(const unsigned long long P)
 {
 	int n_flipped;
 
-	n_flipped  = COUNT_FLIP_2[((P & 0x4020100804020202ULL) * 0x0081020202020202ULL) >> 56];	// B1B3G8
-	n_flipped += count_H_flip_right(P, 17, 0x3f);
-	n_flipped += COUNT_FLIP_5[(((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56];	// D1B3B8
+	n_flipped  = COUNT_FLIP_3[((P & 0x0808080808080808) * 0x0020408102040810) >> 56];
+	n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff];
+	n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#endif // no lzcnt_u64
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 /**
- * Count last flipped discs when playing on square C3.
+ * Count last flipped discs when playing on square E4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_C3(const unsigned long long P)
+static int count_last_flip_E4(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_2[((P & 0x0404040404040404) * 0x0040810204081020) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x0404040404040404) * 0x0040810204081020) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
+	n_flipped  = COUNT_FLIP_3[((P & 0x1010101010101010) * 0x0010204081020408) >> 56];
+	n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff];
+	n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square D3.
+ * Count last flipped discs when playing on square F4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_D3(const unsigned long long P)
+static int count_last_flip_F4(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_2[((P & 0x0808080808080808) * 0x0020408102040810) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x0808080808080808) * 0x0020408102040810) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
+	n_flipped  = COUNT_FLIP_3[((P & 0x2020202020202020) * 0x0008102040810204) >> 56];
+	n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff];
+	n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000) * 0x0101010101010101) >> 56];
+	n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804) * 0x0101010101010101) >> 56];
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square E3.
+ * Count last flipped discs when playing on square G4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_E3(const unsigned long long P)
+static int count_last_flip_G4(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_2[((P & 0x1010101010101010) * 0x0010204081020408) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x1010101010101010) * 0x0010204081020408) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
+	n_flipped  = COUNT_FLIP_3[((P & 0x4040404040201008) * 0x0020202020408102) >> 56];	// D1G4G8
+	n_flipped += count_H_flip_left(P, 30, 0x7e);
+	n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56];	// G1G4C8
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square F3.
+ * Count last flipped discs when playing on square H4.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_F3(const unsigned long long P)
+static int count_last_flip_H4(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_2[((P & 0x2020202020202020) * 0x0008102040810204) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008ULL) * 0x0101010101010101ULL) >> 56];
+	n_flipped  = COUNT_FLIP_3[((P & 0x8080808080402010) * 0x0010101010204081) >> 56];	// E1H4H8
+	n_flipped += count_H_flip_left(P, 31, 0x7f);
+	n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56];	// H1H4D8
 
 	return n_flipped;
 }
 
 /**
- * Count last flipped discs when playing on square G3.
+ * Count last flipped discs when playing on square A5.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_G3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x4040404040402010ULL) * 0x0010101020408102ULL) >> 56];	// E1G3G8
-	n_flipped += count_H_flip_left(P, 22, 0x7e);
-	n_flipped += COUNT_FLIP_5[(((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56];	// G1G3B8
-
-	return n_flipped;
+static int count_last_flip_A5(const unsigned long long P) {
+	return count_last_flip_A4(vertical_mirror(P));
 }
 
 /**
- * Count last flipped discs when playing on square H3.
+ * Count last flipped discs when playing on square B5.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_H3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x8080808080804020ULL) * 0x0008080810204081ULL) >> 56];	// F1H3H8
-	n_flipped += count_H_flip_left(P, 23, 0x7f);
-	n_flipped += COUNT_FLIP_5[(((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56];	// H1H3C8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_2[((P & 0x2020202020202020) * 0x0008102040810204) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
+static int count_last_flip_B5(const unsigned long long P) {
+	return count_last_flip_B4(vertical_mirror(P));
 }
 
 /**
- * Count last flipped discs when playing on square A4.
+ * Count last flipped discs when playing on square C5.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_A4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x1008040201010101) * 0x0102040808080808) >> 56];	// A1A4E8
-	n_flipped += count_H_flip_right(P, 24);
-	n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408) * 0x1010101008040201) >> 56];	// D1A4A8
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x1008040201010101ULL) * 0x0102040808080808ULL) >> 56];	// A1A4E8
-	n_flipped += count_H_flip_right(P, 24, 0x7f);
-	n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56];	// D1A4A8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x1008040201010101) * 0x0102040808080808) >> 56];	// A1A4E8
-	n_flipped += count_H_flip_right(P, 24);
-	n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408) * 0x1010101008040201) >> 56];	// D1A4A8
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
+static int count_last_flip_C5(const unsigned long long P) {
+	return count_last_flip_C4(vertical_mirror(P));
 }
 
 /**
- * Count last flipped discs when playing on square B4.
+ * Count last flipped discs when playing on square D5.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-static int count_last_flip_B4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x2010080402020202) * 0x0081020404040404) >> 56];	// B1B4F8
-	n_flipped += count_H_flip_right(P, 25);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56];	// E1B4B8
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x2010080402020202ULL) * 0x0081020404040404ULL) >> 56];	// B1B4F8
-	n_flipped += count_H_flip_right(P, 25, 0x3f);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56];	// E1B4B8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x2010080402020202) * 0x0081020404040404) >> 56];	// B1B4F8
-	n_flipped += count_H_flip_right(P, 25);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56];	// E1B4B8
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x0404040404040404) * 0x0040810204081020) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x0404040404040404) * 0x0040810204081020) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x0808080808080808) * 0x0020408102040810) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x0808080808080808) * 0x0020408102040810) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x1010101010101010) * 0x0010204081020408) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x1010101010101010) * 0x0010204081020408) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x2020202020202020) * 0x0008102040810204) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804) * 0x0101010101010101) >> 56];
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x2020202020202020) * 0x0008102040810204) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000) * 0x0101010101010101) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804) * 0x0101010101010101) >> 56];
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x4040404040201008) * 0x0020202020408102) >> 56];	// D1G4G8
-	n_flipped += count_H_flip_left(P, 30, 0x7e);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56];	// G1G4C8
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x4040404040201008ULL) * 0x0020202020408102ULL) >> 56];	// D1G4G8
-	n_flipped += count_H_flip_left(P, 30, 0x7e);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56];	// G1G4C8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x4040404040201008) * 0x0020202020408102) >> 56];	// D1G4G8
-	n_flipped += count_H_flip_left(P, 30, 0x7e);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56];	// G1G4C8
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H4(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP_3[((P & 0x8080808080402010) * 0x0010101010204081) >> 56];	// E1H4H8
-	n_flipped += count_H_flip_left(P, 31, 0x7f);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56];	// H1H4D8
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x8080808080402010ULL) * 0x0010101010204081ULL) >> 56];	// E1H4H8
-	n_flipped += count_H_flip_left(P, 31, 0x7f);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56];	// H1H4D8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped  = COUNT_FLIP_3[((P & 0x8080808080402010) * 0x0010101010204081) >> 56];	// E1H4H8
-	n_flipped += count_H_flip_left(P, 31, 0x7f);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56];	// H1H4D8
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_A5(const unsigned long long P) {
-	return count_last_flip_A4(vertical_mirror(P));
-=======
-static int count_last_flip_A5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0804020101010101ULL) * 0x0102040810101010ULL) >> 56];	// A1A5D8
-	n_flipped += count_H_flip_right(P, 32, 0x7f);
-	n_flipped += COUNT_FLIP_3[((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56];	// E1A5A8
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_A5(const unsigned long long P) {
-	return count_last_flip_A4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-/**
- * Count last flipped discs when playing on square B5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_B5(const unsigned long long P) {
-	return count_last_flip_B4(vertical_mirror(P));
-=======
-static int count_last_flip_B5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x1008040202020202ULL) * 0x0081020408080808ULL) >> 56];	// B1B5E8
-	n_flipped += count_H_flip_right(P, 33, 0x3f);
-	n_flipped += COUNT_FLIP_3[(((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56];	// F1B5B8
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_B5(const unsigned long long P) {
-	return count_last_flip_B4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-/**
- * Count last flipped discs when playing on square C5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_C5(const unsigned long long P) {
-	return count_last_flip_C4(vertical_mirror(P));
-=======
-static int count_last_flip_C5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_C5(const unsigned long long P) {
-	return count_last_flip_C4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-/**
- * Count last flipped discs when playing on square D5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_D5(const unsigned long long P) {
-	return count_last_flip_D4(vertical_mirror(P));
-=======
-static int count_last_flip_D5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_D5(const unsigned long long P) {
-	return count_last_flip_D4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
+static int count_last_flip_D5(const unsigned long long P) {
+	return count_last_flip_D4(vertical_mirror(P));
+}
 
 /**
  * Count last flipped discs when playing on square E5.
@@ -1403,26 +662,8 @@ static int count_last_flip_D5(const unsigned long long P) {
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_E5(const unsigned long long P) {
-	return count_last_flip_E4(vertical_mirror(P));
-=======
-static int count_last_flip_E5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 static int count_last_flip_E5(const unsigned long long P) {
 	return count_last_flip_E4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 /**
@@ -1431,26 +672,8 @@ static int count_last_flip_E5(const unsigned long long P) {
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_F5(const unsigned long long P) {
-	return count_last_flip_F4(vertical_mirror(P));
-=======
-static int count_last_flip_F5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 static int count_last_flip_F5(const unsigned long long P) {
 	return count_last_flip_F4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 /**
@@ -1459,25 +682,8 @@ static int count_last_flip_F5(const unsigned long long P) {
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 static int count_last_flip_G5(const unsigned long long P) {
 	return count_last_flip_G4(vertical_mirror(P));
-=======
-static int count_last_flip_G5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x4040404020100804ULL) * 0x0040404040408102ULL) >> 56];	// C1G5G8
-	n_flipped += count_H_flip_left(P, 38, 0x7e);
-	n_flipped += COUNT_FLIP_3[(((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56];	// G1G5D8
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_G5(const unsigned long long P) {
-	return count_last_flip_G4(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 }
 
 /**
@@ -1486,37 +692,12 @@ static int count_last_flip_G5(const unsigned long long P) {
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 static int count_last_flip_H5(const unsigned long long P) {
 	return count_last_flip_H4(vertical_mirror(P));
 }
 
 /**
  * Count last flipped discs when playing on square A3/A6.
-=======
-static int count_last_flip_H5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x8080808040201008ULL) * 0x0020202020204081ULL) >> 56];	// D1H5H8
-	n_flipped += count_H_flip_left(P, 39, 0x7f);
-	n_flipped += COUNT_FLIP_3[(((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56];	// H1H5E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A6.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_H5(const unsigned long long P) {
-	return count_last_flip_H4(vertical_mirror(P));
-}
-
-/**
- * Count last flipped discs when playing on square A3/A6.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -1525,10 +706,6 @@ static int count_last_flip_A6(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 #ifdef __ARM_FEATURE_CLZ // shorter on arm
 	n_flipped  = count_V_flip_reverse((P & 0x0000000101010101), 31);
 	n_flipped += count_V_flip_reverse((P & 0x0000000204081020), 24);
@@ -1538,37 +715,16 @@ static int count_last_flip_A6(const unsigned long long P)
 	n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020) * 0x0404040404040201) >> 56];	// F1A6A8
 #endif
 	n_flipped += count_H_flip_right(P, 40);
-<<<<<<< HEAD
-=======
-	n_flipped  = COUNT_FLIP_5[((P & 0x0402010101010101ULL) * 0x0102040810202020ULL) >> 56];	// A1A6C8
-	n_flipped += count_H_flip_right(P, 40, 0x7f);
-	n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56];	// F1A6A8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_A3(const unsigned long long P) {
 	return count_last_flip_A6(vertical_mirror(P));
 }
 
-<<<<<<< HEAD
 /**
  * Count last flipped discs when playing on square B3/B6.
-=======
-/**
- * Count last flipped discs when playing on square B6.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-/**
- * Count last flipped discs when playing on square B3/B6.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
@@ -1577,10 +733,6 @@ static int count_last_flip_B6(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 #ifdef __ARM_FEATURE_CLZ
 	n_flipped  = count_V_flip_reverse((P & 0x0000000202020202), 30);
 	n_flipped += count_V_flip_reverse((P & 0x0000000408102040), 23);
@@ -1590,492 +742,123 @@ static int count_last_flip_B6(const unsigned long long P)
 	n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040) >> 1) * 0x0404040404040201) >> 56];	// G1B6B8
 #endif
 	n_flipped += count_H_flip_right(P, 41);
-<<<<<<< HEAD
-=======
-	n_flipped  = COUNT_FLIP_5[((P & 0x0804020202020202ULL) * 0x0081020408101010ULL) >> 56];	// B1B6D8
-	n_flipped += count_H_flip_right(P, 41, 0x3f);
-	n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56];	// G1B6B8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-static int count_last_flip_B3(const unsigned long long P) {
-	return count_last_flip_B6(vertical_mirror(P));
-}
-
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-/**
- * Count last flipped discs when playing on square C6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_C6(const unsigned long long P) {
-	return count_last_flip_C3(vertical_mirror(P));
-=======
-static int count_last_flip_C6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x1008040201000000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_C6(const unsigned long long P) {
-	return count_last_flip_C3(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-/**
- * Count last flipped discs when playing on square D6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_D6(const unsigned long long P) {
-	return count_last_flip_D3(vertical_mirror(P));
-=======
-static int count_last_flip_D6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_D6(const unsigned long long P) {
-	return count_last_flip_D3(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-/**
- * Count last flipped discs when playing on square E6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_E6(const unsigned long long P) {
-	return count_last_flip_E3(vertical_mirror(P));
-=======
-static int count_last_flip_E6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_E6(const unsigned long long P) {
-	return count_last_flip_E3(vertical_mirror(P));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-}
-
-/**
- * Count last flipped discs when playing on square F6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int count_last_flip_F6(const unsigned long long P) {
-	return count_last_flip_F3(vertical_mirror(P));
-}
-
-/**
- * Count last flipped discs when playing on square G3/G6.
-=======
-static int count_last_flip_F6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0810204080000000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
 
 	return n_flipped;
 }
 
-/**
- * Count last flipped discs when playing on square G6.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static int count_last_flip_F6(const unsigned long long P) {
-	return count_last_flip_F3(vertical_mirror(P));
-}
-
-/**
- * Count last flipped discs when playing on square G3/G6.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G6(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-#ifdef __ARM_FEATURE_CLZ
-	n_flipped  = count_V_flip_reverse((P & 0x0000004040404040), 23);
-	n_flipped += count_V_flip_reverse((P & 0x0000002010080402), 24);
-	n_flipped += (((P >> 62) & ~(P >> 54) & 1) + ((P >> 60) & ~(P >> 53) & 1)) * 2;
-#else
-	n_flipped  = COUNT_FLIP_5[((P & 0x4040402010080402) * 0x0080808080808102) >> 56];	// B1G6G8
-	n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040) >> 4) * 0x2010080402010101) >> 56];	// G1G6E8
-#endif
-<<<<<<< HEAD
-	n_flipped += count_H_flip_left(P, 46, 0x7e);
-=======
-	n_flipped  = COUNT_FLIP_5[((P & 0x4040402010080402ULL) * 0x0080808080808102ULL) >> 56];	// B1G6G8
-	n_flipped += count_H_flip_left(P, 46, 0x7e);
-	n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56];	// G1G6E8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped += count_H_flip_left(P, 46, 0x7e);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-static int count_last_flip_G3(const unsigned long long P) {
-	return count_last_flip_G6(vertical_mirror(P));
-}
-
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square H3/H6.
-=======
-/**
- * Count last flipped discs when playing on square H6.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-/**
- * Count last flipped discs when playing on square H3/H6.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H6(const unsigned long long P)
-{
-	int n_flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-#ifdef __ARM_FEATURE_CLZ
-	n_flipped  = count_V_flip_reverse((P & 0x0000008080808080), 24);
-	n_flipped += count_V_flip_reverse((P & 0x0000004020100804), 25);
-	n_flipped += (((P >> 63) & ~(P >> 55) & 1) + ((P >> 61) & ~(P >> 54) & 1)) * 2;
-#else
-	n_flipped  = COUNT_FLIP_5[((P & 0x8080804020100804) * 0x0040404040404081) >> 56];	// C1H6H8
-	n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080) >> 5) * 0x2010080402010101) >> 56];	// H1H6F8
-#endif
-<<<<<<< HEAD
-	n_flipped += count_H_flip_left(P, 47, 0x7f);
-=======
-	n_flipped  = COUNT_FLIP_5[((P & 0x8080804020100804ULL) * 0x0040404040404081ULL) >> 56];	// C1H6H8
-	n_flipped += count_H_flip_left(P, 47, 0x7f);
-	n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56];	// H1H6F8
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flipped += count_H_flip_left(P, 47, 0x7f);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-	return n_flipped;
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-static int count_last_flip_H3(const unsigned long long P) {
-	return count_last_flip_H6(vertical_mirror(P));
-}
-
-#ifdef lzcnt_u64
-
-<<<<<<< HEAD
-/**
- * Count last flipped discs when playing on square A7/A8.
-=======
-/**
- * Count last flipped discs when playing on square A7.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-/**
- * Count last flipped discs when playing on square A7/A8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_A7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000010101010101ULL), 23);
-	n_flipped += count_H_flip_right(P, 48, 0x7f);
-	n_flipped += count_V_flip_reverse((P & 0x0000020408102040ULL), 16);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_A7(const unsigned long long P) {
-	return count_last_flip_A2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square B7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_B7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000020202020202ULL), 22);
-	n_flipped += count_H_flip_right(P, 49, 0x3f);
-	n_flipped += count_V_flip_reverse((P & 0x0000040810204080ULL), 15);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_B7(const unsigned long long P) {
-	return count_last_flip_B2(vertical_mirror(P));
+static int count_last_flip_B3(const unsigned long long P) {
+	return count_last_flip_B6(vertical_mirror(P));
 }
-#endif
 
 /**
- * Count last flipped discs when playing on square C7.
+ * Count last flipped discs when playing on square C6.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_C7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000040404040404ULL), 21);
-	n_flipped += COUNT_FLIP_2[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x00040A1120408000ULL) * 0x0101010101010101ULL) >> 56];	// A5C7H2
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_C7(const unsigned long long P) {
-	return count_last_flip_C2(vertical_mirror(P));
+static int count_last_flip_C6(const unsigned long long P) {
+	return count_last_flip_C3(vertical_mirror(P));
 }
-#endif
 
 /**
- * Count last flipped discs when playing on square D7.
+ * Count last flipped discs when playing on square D6.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_D7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000080808080808ULL), 20);
-	n_flipped += COUNT_FLIP_3[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0008142241800000ULL) * 0x0101010101010101ULL) >> 56];	// A4D7H3
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_D7(const unsigned long long P) {
-	return count_last_flip_D2(vertical_mirror(P));
+static int count_last_flip_D6(const unsigned long long P) {
+	return count_last_flip_D3(vertical_mirror(P));
 }
-#endif
 
 /**
- * Count last flipped discs when playing on square E7.
+ * Count last flipped discs when playing on square E6.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_E7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000101010101010ULL), 19);
-	n_flipped += COUNT_FLIP_4[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0010284482010000ULL) * 0x0101010101010101ULL) >> 56];	// A3E7H4
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_E7(const unsigned long long P) {
-	return count_last_flip_E2(vertical_mirror(P));
+static int count_last_flip_E6(const unsigned long long P) {
+	return count_last_flip_E3(vertical_mirror(P));
 }
-#endif
 
 /**
- * Count last flipped discs when playing on square F7.
+ * Count last flipped discs when playing on square F6.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_F7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000202020202020ULL), 18);
-	n_flipped += COUNT_FLIP_5[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0020508804020100ULL) * 0x0101010101010101ULL) >> 56];	// A2F7H5
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_F7(const unsigned long long P) {
-	return count_last_flip_F2(vertical_mirror(P));
+static int count_last_flip_F6(const unsigned long long P) {
+	return count_last_flip_F3(vertical_mirror(P));
 }
-#endif
 
 /**
- * Count last flipped discs when playing on square G7.
+ * Count last flipped discs when playing on square G3/G6.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_G7(const unsigned long long P)
+static int count_last_flip_G6(const unsigned long long P)
 {
 	int n_flipped;
 
-	n_flipped  = count_V_flip_reverse((P & 0x0000404040404040ULL), 17);
-	n_flipped += count_H_flip_left(P, 54, 0x7e);
-	n_flipped += count_V_flip_reverse((P & 0x0000201008040201ULL), 18);
+#ifdef __ARM_FEATURE_CLZ
+	n_flipped  = count_V_flip_reverse((P & 0x0000004040404040), 23);
+	n_flipped += count_V_flip_reverse((P & 0x0000002010080402), 24);
+	n_flipped += (((P >> 62) & ~(P >> 54) & 1) + ((P >> 60) & ~(P >> 53) & 1)) * 2;
+#else
+	n_flipped  = COUNT_FLIP_5[((P & 0x4040402010080402) * 0x0080808080808102) >> 56];	// B1G6G8
+	n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040) >> 4) * 0x2010080402010101) >> 56];	// G1G6E8
+#endif
+	n_flipped += count_H_flip_left(P, 46, 0x7e);
 
 	return n_flipped;
 }
-#else
-static int count_last_flip_G7(const unsigned long long P) {
-	return count_last_flip_G2(vertical_mirror(P));
+
+static int count_last_flip_G3(const unsigned long long P) {
+	return count_last_flip_G6(vertical_mirror(P));
 }
-#endif
 
 /**
- * Count last flipped discs when playing on square H7.
+ * Count last flipped discs when playing on square H3/H6.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_H7(const unsigned long long P)
+static int count_last_flip_H6(const unsigned long long P)
 {
 	int n_flipped;
 
-	n_flipped  = count_V_flip_reverse((P & 0x0000808080808080ULL), 16);
-	n_flipped += count_H_flip_left(P, 55, 0x7f);
-	n_flipped += count_V_flip_reverse((P & 0x0000402010080402ULL), 17);
+#ifdef __ARM_FEATURE_CLZ
+	n_flipped  = count_V_flip_reverse((P & 0x0000008080808080), 24);
+	n_flipped += count_V_flip_reverse((P & 0x0000004020100804), 25);
+	n_flipped += (((P >> 63) & ~(P >> 55) & 1) + ((P >> 61) & ~(P >> 54) & 1)) * 2;
+#else
+	n_flipped  = COUNT_FLIP_5[((P & 0x8080804020100804) * 0x0040404040404081) >> 56];	// C1H6H8
+	n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080) >> 5) * 0x2010080402010101) >> 56];	// H1H6F8
+#endif
+	n_flipped += count_H_flip_left(P, 47, 0x7f);
 
 	return n_flipped;
 }
-#else
-static int count_last_flip_H7(const unsigned long long P) {
-	return count_last_flip_H2(vertical_mirror(P));
+
+static int count_last_flip_H3(const unsigned long long P) {
+	return count_last_flip_H6(vertical_mirror(P));
 }
-#endif
+
+#ifdef lzcnt_u64
 
 /**
- * Count last flipped discs when playing on square A8.
+ * Count last flipped discs when playing on square A7/A8.
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_A8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x0101010101010101), 15);
-	n_flipped += count_H_flip_right(P, 56);
-	n_flipped += count_V_flip_reverse((P & 0x0002040810204080), 8);
-
-	return n_flipped;
-}
-
-static int count_last_flip_A7(const unsigned long long P) {
-	return count_last_flip_A8(P << 8);
-}
-
-static int count_last_flip_A1(const unsigned long long P) {
-	return count_last_flip_A8(vertical_mirror(P));
-}
-
-static int count_last_flip_A2(const unsigned long long P) {
-	return count_last_flip_A8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square B7/B8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0001010101010101ULL), 15);
-	n_flipped += count_H8_flip_right(P, 56);
-	n_flipped += count_V_flip_reverse((P & 0x0002040810204080ULL), 8);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x0101010101010101), 15);
 	n_flipped += count_H_flip_right(P, 56);
 	n_flipped += count_V_flip_reverse((P & 0x0002040810204080), 8);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
@@ -2093,59 +876,18 @@ static int count_last_flip_A2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square B8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square B7/B8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_B8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x0202020202020202), 14);
-	n_flipped += count_H_flip_right(P, 57);
-	n_flipped += count_V_flip_reverse((P & 0x0004081020408000), 7);
-
-	return n_flipped;
-}
-
-static int count_last_flip_B7(const unsigned long long P) {
-	return count_last_flip_B8(P << 8);
-}
-
-static int count_last_flip_B1(const unsigned long long P) {
-	return count_last_flip_B8(vertical_mirror(P));
-}
-
-static int count_last_flip_B2(const unsigned long long P) {
-	return count_last_flip_B8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square C7/C8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0002020202020202ULL), 14);
-	n_flipped += count_H8_flip_right(P, 57);
-	n_flipped += count_V_flip_reverse((P & 0x0004081020408000ULL), 7);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x0202020202020202), 14);
 	n_flipped += count_H_flip_right(P, 57);
 	n_flipped += count_V_flip_reverse((P & 0x0004081020408000), 7);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return n_flipped;
 }
@@ -2163,55 +905,16 @@ static int count_last_flip_B2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square C8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square C7/C8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_C8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x0404040404040404), 13);
-	n_flipped += COUNT_FLIP_2[P >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000) * 0x0101010101010101) >> 56];	// A6C8H3
-
-	return n_flipped;
-}
-
-static int count_last_flip_C7(const unsigned long long P) {
-	return count_last_flip_C8(P << 8);
-}
-
-static int count_last_flip_C1(const unsigned long long P) {
-	return count_last_flip_C8(vertical_mirror(P));
-}
-
-static int count_last_flip_C2(const unsigned long long P) {
-	return count_last_flip_C8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square D7/D8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0004040404040404ULL), 13);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x0404040404040404), 13);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_2[P >> 56];
 	n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000) * 0x0101010101010101) >> 56];	// A6C8H3
 
@@ -2231,55 +934,16 @@ static int count_last_flip_C2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square D8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square D7/D8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_D8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x0808080808080808), 12);
-	n_flipped += COUNT_FLIP_3[P >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000) * 0x0101010101010101) >> 56];	// A5D8H4
-
-	return n_flipped;
-}
-
-static int count_last_flip_D7(const unsigned long long P) {
-	return count_last_flip_D8(P << 8);
-}
-
-static int count_last_flip_D1(const unsigned long long P) {
-	return count_last_flip_D8(vertical_mirror(P));
-}
-
-static int count_last_flip_D2(const unsigned long long P) {
-	return count_last_flip_D8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square E7/E8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0008080808080808ULL), 12);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x0808080808080808), 12);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_3[P >> 56];
 	n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000) * 0x0101010101010101) >> 56];	// A5D8H4
 
@@ -2299,55 +963,16 @@ static int count_last_flip_D2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square E8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square E7/E8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_E8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x1010101010101010), 11);
-	n_flipped += COUNT_FLIP_4[P >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000) * 0x0101010101010101) >> 56];	// A4E8H5
-
-	return n_flipped;
-}
-
-static int count_last_flip_E7(const unsigned long long P) {
-	return count_last_flip_E8(P << 8);
-}
-
-static int count_last_flip_E1(const unsigned long long P) {
-	return count_last_flip_E8(vertical_mirror(P));
-}
-
-static int count_last_flip_E2(const unsigned long long P) {
-	return count_last_flip_E8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square F7/F8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0010101010101010ULL), 11);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x1010101010101010), 11);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_4[P >> 56];
 	n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000) * 0x0101010101010101) >> 56];	// A4E8H5
 
@@ -2367,55 +992,16 @@ static int count_last_flip_E2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square F8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square F7/F8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_F8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x2020202020202020), 10);
-	n_flipped += COUNT_FLIP_5[P >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000) * 0x0101010101010101) >> 56];	// A3F8H6
-
-	return n_flipped;
-}
-
-static int count_last_flip_F7(const unsigned long long P) {
-	return count_last_flip_F8(P << 8);
-}
-
-static int count_last_flip_F1(const unsigned long long P) {
-	return count_last_flip_F8(vertical_mirror(P));
-}
-
-static int count_last_flip_F2(const unsigned long long P) {
-	return count_last_flip_F8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square G7/G8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0020202020202020ULL), 10);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x2020202020202020), 10);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += COUNT_FLIP_5[P >> 56];
 	n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000) * 0x0101010101010101) >> 56];	// A3F8H6
 
@@ -2435,55 +1021,16 @@ static int count_last_flip_F2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square G8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square G7/G8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_G8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x4040404040404040), 9);
-	n_flipped += count_H_flip_left(P, 62, 0x7e);
-	n_flipped += count_V_flip_reverse((P & 0x0020100804020100), 10);
-
-	return n_flipped;
-}
-
-static int count_last_flip_G7(const unsigned long long P) {
-	return count_last_flip_G8(P << 8);
-}
-
-static int count_last_flip_G1(const unsigned long long P) {
-	return count_last_flip_G8(vertical_mirror(P));
-}
-
-static int count_last_flip_G2(const unsigned long long P) {
-	return count_last_flip_G8(vertical_mirror(P) << 8);
-}
-
-/**
- * Count last flipped discs when playing on square H7/H8.
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0040404040404040ULL), 9);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x4040404040404040), 9);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += count_H_flip_left(P, 62, 0x7e);
 	n_flipped += count_V_flip_reverse((P & 0x0020100804020100), 10);
 
@@ -2503,54 +1050,16 @@ static int count_last_flip_G2(const unsigned long long P) {
 }
 
 /**
-<<<<<<< HEAD
- * Count last flipped discs when playing on square H8.
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
  * Count last flipped discs when playing on square H7/H8.
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
  *
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if defined(__LZCNT__) || defined(__AVX2__)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static int count_last_flip_H8(const unsigned long long P)
 {
 	int n_flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = count_V_flip_reverse((P & 0x8080808080808080), 8);
-	n_flipped += count_H_flip_left(P, 63, 0x7f);
-	n_flipped += count_V_flip_reverse((P & 0x0040201008040201), 9);
-
-	return n_flipped;
-}
-
-static int count_last_flip_H7(const unsigned long long P) {
-	return count_last_flip_H8(P << 8);
-}
-
-static int count_last_flip_H1(const unsigned long long P) {
-	return count_last_flip_H8(vertical_mirror(P));
-}
-
-static int count_last_flip_H2(const unsigned long long P) {
-	return count_last_flip_H8(vertical_mirror(P) << 8);
-}
-
-#endif // lzcnt_u64
-=======
-	n_flipped  = count_V_flip_reverse((P & 0x0080808080808080ULL), 8);
-=======
 	n_flipped  = count_V_flip_reverse((P & 0x8080808080808080), 8);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	n_flipped += count_H_flip_left(P, 63, 0x7f);
 	n_flipped += count_V_flip_reverse((P & 0x0040201008040201), 9);
 
@@ -2560,10 +1069,6 @@ static int count_last_flip_H2(const unsigned long long P) {
 static int count_last_flip_H7(const unsigned long long P) {
 	return count_last_flip_H8(P << 8);
 }
-<<<<<<< HEAD
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 
 static int count_last_flip_H1(const unsigned long long P) {
 	return count_last_flip_H8(vertical_mirror(P));
@@ -2574,7 +1079,6 @@ static int count_last_flip_H2(const unsigned long long P) {
 }
 
 #endif // lzcnt_u64
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 /**
  * Count last flipped discs when plassing.
@@ -2608,1442 +1112,3 @@ int (*count_last_flip[])(const unsigned long long) = {
 	count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8,
 	count_last_flip_pass,
 };
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_bitscan.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move
- * for each square of the board. These functions are gathered into an array of
- * functions, so that a fast access to each function is allowed. The generic
- * form of the function take as input the player bitboard and return twice
- * the number of flipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * With 135 degree merge, instead of Valery ClaudePierre's modification.
- *
- * For top to bottom flip, LS1B isolation (http://chessprogramming.wikispaces.com/
- * General+Setwise+Operations) is used to get the outflank bit.
- *
- * @date 1998 - 2018
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-/** precomputed count flip array */
-static const char COUNT_FLIP_2[256] = {
-	 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0
-};
-
-static const char COUNT_FLIP_3[256] = {
-	 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_4[256] = {
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_5[256] = {
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-#include "bit.h"
-
-#if defined(__LZCNT__) || defined(__AVX2__)
-static inline int count_V_flip_reverse (unsigned long long P, int ofs) {
-	return (_lzcnt_u64(P << ofs) >> 2) & 0x0E;
-}
-#elif defined(_MSC_VER)	// Not used
-static inline int count_V_flip_reverse (unsigned long long P, int ofs) {
-	unsigned long i;
-	return (((_BitScanReverse64(&i, (P << ofs)) ? (int) i : 127) ^ 63) >> 2) & 0x0E;
-}
-#else
-// with guardian bit to avoid __builtin_clz(0)	// Not used
-static inline int count_V_flip_reverse (unsigned long long P, int ofs) {
-	return ((__builtin_clzll((P << ofs) | 1) + 1) >> 2) & 0x0E;
-}
-#endif
-
-#if defined(__LZCNT__) || defined(__AVX2__)
-
-static inline int count_H1_flip_left (unsigned long long P, int pos, int mask) {
-	return (_lzcnt_u32((P << (8 - pos)) & (mask << 1)) & 0x07) * 2;
-}
-
-static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
-	return (_lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2;
-}
-
-#else
-
-static const char COUNT_FLIP_L[128] = {
-	 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-	 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static inline int count_H1_flip_left (unsigned long long P, int pos, int mask) {
-	return COUNT_FLIP_L[(P << (7 - pos)) & mask];
-}
-
-static inline int count_H_flip_left (unsigned long long P, int pos, int mask) {
-	return COUNT_FLIP_L[(P >> (pos - 7)) & mask];
-}
-
-#endif
-
-#if (defined(__BMI__) || defined(__AVX2__)) && !(defined(__GNUC__) && (__GNUC__ < 6))	// GCC Bug 78037
-
-static inline int count_H_flip_right (unsigned long long P, int pos, int mask) {
-	return (_tzcnt_u32((P >> (pos + 1)) & mask) & 0x07) * 2;
-}
-
-static inline int count_H8_flip_right (unsigned long long P, int pos) {
-	return (_tzcnt_u32(P >> (pos + 1)) & 0x07) * 2;
-}
-
-#else
-
-static const char COUNT_FLIP_R[128] = {
-	 0,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,  8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,  8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	12,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,  8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,  8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0
-};
-
-static inline int count_H_flip_right (unsigned long long P, int pos, int mask) {
-	return COUNT_FLIP_R[(P >> (pos + 1)) & mask];
-}
-
-static inline int count_H8_flip_right (unsigned long long P, int pos) {
-	return COUNT_FLIP_R[P >> (pos + 1)];
-}
-
-#endif
-
-/**
- * Count last flipped discs when playing on square A1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0101010101010100ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000020406080a0c0ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 0, 0x7f);
-	P_d9 = P & 0x8040201008040200ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0202020202020200ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000102030405060ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 1, 0x3f);
-	P_d9 = P & 0x0080402010080400ULL;
-	n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0404040404040400ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000081018202830ULL) >> 60;
-	n_flipped += COUNT_FLIP_2[P & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04ULL) * 0x0101010101010101ULL) >> 56];	// A3C1H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0808080808080800ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000004080c101418ULL) >> 60;
-	n_flipped += COUNT_FLIP_3[P & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408ULL) * 0x0101010101010101ULL) >> 56];	// A4D1H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x1010101010101000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000020406080a0cULL) >> 60;
-	n_flipped += COUNT_FLIP_4[P & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810ULL) * 0x0101010101010101ULL) >> 56];	// A5E1H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x2020202020202000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000010203040506ULL) >> 60;
-	n_flipped += COUNT_FLIP_5[P & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020ULL) * 0x0101010101010101ULL) >> 56];	// A6F1H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x4040404040404000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000008101820283ULL) >> 60;
-	n_flipped += count_H1_flip_left(P, 6, 0x7e);
-	P_d7 = P & 0x0001020408102000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x8080808080808000ULL;
-	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000008101820283ULL) >> 60;
-	n_flipped += count_H1_flip_left(P, 7, 0x7f);
-	P_d7 = P & 0x0102040810204000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0101010101010000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x00000020406080a0ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 8, 0x7f);
-	P_d9 = P & 0x4020100804020000ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x00000010100c0805ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0202020202020000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000001020304050ULL) >> 60;
-	n_flipped += count_H_flip_right(P, 9, 0x3f);
-	P_d9 = P & 0x8040201008040000ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 2) * 0x00000010100c0805ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0404040404040000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000000810182028ULL) >> 60;
-	n_flipped += COUNT_FLIP_2[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x00804020110A0400ULL) * 0x0101010101010101ULL) >> 56];	// A4C2H7
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0808080808080000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x00000004080c1014ULL) >> 60;
-	n_flipped += COUNT_FLIP_3[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000804122140800ULL) * 0x0101010101010101ULL) >> 56];	// A5D2H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x1010101010100000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000000020406080aULL) >> 60;
-	n_flipped += COUNT_FLIP_4[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000018244281000ULL) * 0x0101010101010101ULL) >> 56];	// A6E2H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x2020202020200000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000000102030405ULL) >> 60;
-	n_flipped += COUNT_FLIP_5[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0001020488502000ULL) * 0x0101010101010101ULL) >> 56];	// A7F2H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x4040404040400000ULL;
-	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000000102030405ULL) >> 60;
-	n_flipped += count_H_flip_left(P, 14, 0x7e);
-	P_d7 = P & 0x0102040810200000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x00000002081840a0ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x8080808080800000ULL;
-	n_flipped  = (((P_v & -P_v) >> 2) * 0x0000000102030405ULL) >> 60;
-	n_flipped += count_H_flip_left(P, 15, 0x7f);
-	P_d7 = P & 0x0204081020400000ULL;
-	n_flipped += (((P_d7 & -P_d7) >> 2) * 0x0000000410308143ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x2010080402010101ULL) * 0x0102040404040404ULL) >> 56];	// A1A3F8
-	n_flipped += count_H_flip_right(P, 16, 0x7f);
-	n_flipped += COUNT_FLIP_5[((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56];	// C1A3A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x4020100804020202ULL) * 0x0081020202020202ULL) >> 56];	// B1B3G8
-	n_flipped += count_H_flip_right(P, 17, 0x3f);
-	n_flipped += COUNT_FLIP_5[(((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56];	// D1B3B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x4040404040402010ULL) * 0x0010101020408102ULL) >> 56];	// E1G3G8
-	n_flipped += count_H_flip_left(P, 22, 0x7e);
-	n_flipped += COUNT_FLIP_5[(((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56];	// G1G3B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x8080808080804020ULL) * 0x0008080810204081ULL) >> 56];	// F1H3H8
-	n_flipped += count_H_flip_left(P, 23, 0x7f);
-	n_flipped += COUNT_FLIP_5[(((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56];	// H1H3C8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x1008040201010101ULL) * 0x0102040808080808ULL) >> 56];	// A1A4E8
-	n_flipped += count_H_flip_right(P, 24, 0x7f);
-	n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56];	// D1A4A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x2010080402020202ULL) * 0x0081020404040404ULL) >> 56];	// B1B4F8
-	n_flipped += count_H_flip_right(P, 25, 0x3f);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56];	// E1B4B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x4040404040201008ULL) * 0x0020202020408102ULL) >> 56];	// D1G4G8
-	n_flipped += count_H_flip_left(P, 30, 0x7e);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56];	// G1G4C8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x8080808080402010ULL) * 0x0010101010204081ULL) >> 56];	// E1H4H8
-	n_flipped += count_H_flip_left(P, 31, 0x7f);
-	n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56];	// H1H4D8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0804020101010101ULL) * 0x0102040810101010ULL) >> 56];	// A1A5D8
-	n_flipped += count_H_flip_right(P, 32, 0x7f);
-	n_flipped += COUNT_FLIP_3[((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56];	// E1A5A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x1008040202020202ULL) * 0x0081020408080808ULL) >> 56];	// B1B5E8
-	n_flipped += count_H_flip_right(P, 33, 0x3f);
-	n_flipped += COUNT_FLIP_3[(((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56];	// F1B5B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x4040404020100804ULL) * 0x0040404040408102ULL) >> 56];	// C1G5G8
-	n_flipped += count_H_flip_left(P, 38, 0x7e);
-	n_flipped += COUNT_FLIP_3[(((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56];	// G1G5D8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x8080808040201008ULL) * 0x0020202020204081ULL) >> 56];	// D1H5H8
-	n_flipped += count_H_flip_left(P, 39, 0x7f);
-	n_flipped += COUNT_FLIP_3[(((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56];	// H1H5E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0402010101010101ULL) * 0x0102040810202020ULL) >> 56];	// A1A6C8
-	n_flipped += count_H_flip_right(P, 40, 0x7f);
-	n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56];	// F1A6A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0804020202020202ULL) * 0x0081020408101010ULL) >> 56];	// B1B6D8
-	n_flipped += count_H_flip_right(P, 41, 0x3f);
-	n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56];	// G1B6B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x1008040201000000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0810204080000000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x4040402010080402ULL) * 0x0080808080808102ULL) >> 56];	// B1G6G8
-	n_flipped += count_H_flip_left(P, 46, 0x7e);
-	n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56];	// G1G6E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x8080804020100804ULL) * 0x0040404040404081ULL) >> 56];	// C1H6H8
-	n_flipped += count_H_flip_left(P, 47, 0x7f);
-	n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56];	// H1H6F8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_A7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000010101010101ULL), 23);
-	n_flipped += count_H_flip_right(P, 48, 0x7f);
-	n_flipped += count_V_flip_reverse((P & 0x0000020408102040ULL), 16);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_A7(const unsigned long long P) {
-	return count_last_flip_A2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square B7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_B7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000020202020202ULL), 22);
-	n_flipped += count_H_flip_right(P, 49, 0x3f);
-	n_flipped += count_V_flip_reverse((P & 0x0000040810204080ULL), 15);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_B7(const unsigned long long P) {
-	return count_last_flip_B2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square C7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_C7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000040404040404ULL), 21);
-	n_flipped += COUNT_FLIP_2[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x00040A1120408000ULL) * 0x0101010101010101ULL) >> 56];	// A5C7H2
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_C7(const unsigned long long P) {
-	return count_last_flip_C2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square D7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_D7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000080808080808ULL), 20);
-	n_flipped += COUNT_FLIP_3[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0008142241800000ULL) * 0x0101010101010101ULL) >> 56];	// A4D7H3
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_D7(const unsigned long long P) {
-	return count_last_flip_D2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square E7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_E7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000101010101010ULL), 19);
-	n_flipped += COUNT_FLIP_4[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0010284482010000ULL) * 0x0101010101010101ULL) >> 56];	// A3E7H4
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_E7(const unsigned long long P) {
-	return count_last_flip_E2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square F7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_F7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000202020202020ULL), 18);
-	n_flipped += COUNT_FLIP_5[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0020508804020100ULL) * 0x0101010101010101ULL) >> 56];	// A2F7H5
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_F7(const unsigned long long P) {
-	return count_last_flip_F2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square G7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_G7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000404040404040ULL), 17);
-	n_flipped += count_H_flip_left(P, 54, 0x7e);
-	n_flipped += count_V_flip_reverse((P & 0x0000201008040201ULL), 18);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_G7(const unsigned long long P) {
-	return count_last_flip_G2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square H7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_H7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0000808080808080ULL), 16);
-	n_flipped += count_H_flip_left(P, 55, 0x7f);
-	n_flipped += count_V_flip_reverse((P & 0x0000402010080402ULL), 17);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_H7(const unsigned long long P) {
-	return count_last_flip_H2(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square A8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_A8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0001010101010101ULL), 15);
-	n_flipped += count_H8_flip_right(P, 56);
-	n_flipped += count_V_flip_reverse((P & 0x0002040810204080ULL), 8);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_A8(const unsigned long long P) {
-	return count_last_flip_A1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square B8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_B8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0002020202020202ULL), 14);
-	n_flipped += count_H8_flip_right(P, 57);
-	n_flipped += count_V_flip_reverse((P & 0x0004081020408000ULL), 7);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_B8(const unsigned long long P) {
-	return count_last_flip_B1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square C8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_C8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0004040404040404ULL), 13);
-	n_flipped += COUNT_FLIP_2[P >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000ULL) * 0x0101010101010101ULL) >> 56];	// A6C8H3
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_C8(const unsigned long long P) {
-	return count_last_flip_C1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square D8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_D8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0008080808080808ULL), 12);
-	n_flipped += COUNT_FLIP_3[P >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000ULL) * 0x0101010101010101ULL) >> 56];	// A5D8H4
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_D8(const unsigned long long P) {
-	return count_last_flip_D1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square E8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_E8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0010101010101010ULL), 11);
-	n_flipped += COUNT_FLIP_4[P >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000ULL) * 0x0101010101010101ULL) >> 56];	// A4E8H5
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_E8(const unsigned long long P) {
-	return count_last_flip_E1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square F8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_F8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0020202020202020ULL), 10);
-	n_flipped += COUNT_FLIP_5[P >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000ULL) * 0x0101010101010101ULL) >> 56];	// A3F8H6
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_F8(const unsigned long long P) {
-	return count_last_flip_F1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square G8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_G8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0040404040404040ULL), 9);
-	n_flipped += count_H_flip_left(P, 62, 0x7e);
-	n_flipped += count_V_flip_reverse((P & 0x0020100804020100ULL), 10);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_G8(const unsigned long long P) {
-	return count_last_flip_G1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when playing on square H8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-#if defined(__LZCNT__) || defined(__AVX2__)
-static int count_last_flip_H8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = count_V_flip_reverse((P & 0x0080808080808080ULL), 8);
-	n_flipped += count_H_flip_left(P, 63, 0x7f);
-	n_flipped += count_V_flip_reverse((P & 0x0040201008040201ULL), 9);
-
-	return n_flipped;
-}
-#else
-static int count_last_flip_H8(const unsigned long long P) {
-	return count_last_flip_H1(vertical_mirror(P));
-}
-#endif
-
-/**
- * Count last flipped discs when plassing.
- *
- * @param P player's disc pattern (unused).
- * @return zero.
- */
-static int count_last_flip_pass(const unsigned long long P)
-{
-	(void) P; // useless code to shut-up compiler warning
-	return 0;
-}
-
-/** Array of functions to count flipped discs of the last move */
-int (*count_last_flip[])(const unsigned long long) = {
-	count_last_flip_A1, count_last_flip_B1, count_last_flip_C1, count_last_flip_D1,
-	count_last_flip_E1, count_last_flip_F1, count_last_flip_G1, count_last_flip_H1,
-	count_last_flip_A2, count_last_flip_B2, count_last_flip_C2, count_last_flip_D2,
-	count_last_flip_E2, count_last_flip_F2, count_last_flip_G2, count_last_flip_H2,
-	count_last_flip_A3, count_last_flip_B3, count_last_flip_C3, count_last_flip_D3,
-	count_last_flip_E3, count_last_flip_F3, count_last_flip_G3, count_last_flip_H3,
-	count_last_flip_A4, count_last_flip_B4, count_last_flip_C4, count_last_flip_D4,
-	count_last_flip_E4, count_last_flip_F4, count_last_flip_G4, count_last_flip_H4,
-	count_last_flip_A5, count_last_flip_B5, count_last_flip_C5, count_last_flip_D5,
-	count_last_flip_E5, count_last_flip_F5, count_last_flip_G5, count_last_flip_H5,
-	count_last_flip_A6, count_last_flip_B6, count_last_flip_C6, count_last_flip_D6,
-	count_last_flip_E6, count_last_flip_F6, count_last_flip_G6, count_last_flip_H6,
-	count_last_flip_A7, count_last_flip_B7, count_last_flip_C7, count_last_flip_D7,
-	count_last_flip_E7, count_last_flip_F7, count_last_flip_G7, count_last_flip_H7,
-	count_last_flip_A8, count_last_flip_B8, count_last_flip_C8, count_last_flip_D8,
-	count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8,
-	count_last_flip_pass,
-};
-
->>>>>>> b3f048d (copyright changes)
-=======
-
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
diff --git a/src/count_last_flip_bmi.c b/src/count_last_flip_bmi.c
index e515227..fc80a5d 100644
--- a/src/count_last_flip_bmi.c
+++ b/src/count_last_flip_bmi.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_bmi.c
  *
@@ -174,180 +170,3 @@ int last_flip(int pos, unsigned long long P)
 
 	return n_flipped;
 }
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_bmi.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move.
- *
- * This implementation uses BMI1 instructions, lzcnt and tzcnt.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * @date 1998 - 2018
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-#include "bit.h"
-
-/** precomputed count flip array */
-static const unsigned char COUNT_FLIP[8][256] = {
-	{
-		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		12, 12,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-	},
-	{
-		 0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		10, 10, 10, 10,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	},
-	{
-		 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-};
-
-/* bit masks for diagonal lines */
-static const unsigned long long mask_d[2][64] = {
-	{
-		0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL
-	},
-	{
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL,
-		0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL,
-		0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL,
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL
-	}
-};
-
-/**
- * Count last flipped discs when playing on the last empty.
- *
- * @param pos the last empty square.
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-
-int last_flip(int pos, unsigned long long P)
-{
-	unsigned long long	P8, P7, P9;
-	int	n_flipped;
-	int	x = pos & 7;
-	int	y = pos & 0x38;
-	int	ry = y ^ 0x38;
-
-	n_flipped = COUNT_FLIP[x][(unsigned char) (P >> y)];
-
-	P8 = P & (0x0101010101010101ULL << x);
-	P7 = P & mask_d[0][pos];
-	P9 = P & mask_d[1][pos];
-
-	n_flipped += ((((int) __tzcnt_u64((P8 >> y) >> 8) + (int) __lzcnt64((P8 << ry) << 8)) & 0x38)
-		+ ((int) __tzcnt_u64((P7 >> y) >> 8) & 0x38)
-		+ ((int) __tzcnt_u64((P9 >> y) >> 8) & 0x38)
-		+ ((int) __lzcnt64((P7 << ry) << 8) & 0x38)
-		+ ((int) __lzcnt64((P9 << ry) << 8) & 0x38)) >> 2;
-
-	return n_flipped;
-}
->>>>>>> f24cc06 (avoid BMI2 for AMD; more lzcnt/tzcnt in count_last_flip_bitscan)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/count_last_flip_bmi2.c b/src/count_last_flip_bmi2.c
index 2ff3573..f85c0e4 100644
--- a/src/count_last_flip_bmi2.c
+++ b/src/count_last_flip_bmi2.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_bmi2.c
  *
@@ -21,45 +17,18 @@
  * For optimization purpose, the value returned is twice the number of flipped
  * disc, to facilitate the computation of disc difference.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2018
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 1998 - 2023
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
  * 
  */
 
 #include "bit.h"
-<<<<<<< HEAD
-<<<<<<< HEAD
-#include <stdint.h>
-
-/** precomputed count flip array */
-const uint8_t COUNT_FLIP[8][256] = {
-=======
-
-/** precomputed count flip array */
-const unsigned char COUNT_FLIP[8][256] = {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #include <stdint.h>
 
 /** precomputed count flip array */
 const uint8_t COUNT_FLIP[8][256] = {
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 	{
 		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
 		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
@@ -220,240 +189,16 @@ const unsigned long long mask_x[64][4] = {
 
 inline int last_flip(int pos, unsigned long long P)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint_fast8_t	n_flipped;
-=======
-	unsigned char	n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	uint_fast8_t	n_flipped;
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 	int	x = pos & 7;
 	int	y = pos >> 3;
 
 	P &= mask_x[pos][3];	// mask out unrelated bits to make dummy 0 bits for outside
 	// n_flipped  = COUNT_FLIP[x][_bextr_u64(P, pos & 0x38, 8)];
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flipped  = COUNT_FLIP[x][(P >> (pos & 0x38)) & 0xFF];
-=======
-	n_flipped  = COUNT_FLIP[x][(unsigned char) (P >> (pos & 0x38))];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	n_flipped  = COUNT_FLIP[x][(P >> (pos & 0x38)) & 0xFF];
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 	n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][0])];
 	n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][1])];
 	n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][2])];
 
 	return n_flipped;
 }
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_bmi2.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by the 
- * BMI2 PEXT instruction.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * @date 1998 - 2018
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-#include "bit.h"
-
-/** precomputed count flip array */
-const unsigned char COUNT_FLIP[8][256] = {
-	{
-		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		12, 12,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-	},
-	{
-		 0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		10, 10, 10, 10,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	},
-	{
-		 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-};
-
-/* bit masks for diagonal/vertical/all lines */
-const unsigned long long mask_x[64][4] = {
-	{ 0x0000000000000001ULL, 0x8040201008040201ULL, 0x0101010101010101ULL, 0x81412111090503ffULL },
-	{ 0x0000000000000102ULL, 0x0080402010080402ULL, 0x0202020202020202ULL, 0x02824222120a07ffULL },
-	{ 0x0000000000010204ULL, 0x0000804020100804ULL, 0x0404040404040404ULL, 0x0404844424150effULL },
-	{ 0x0000000001020408ULL, 0x0000008040201008ULL, 0x0808080808080808ULL, 0x08080888492a1cffULL },
-	{ 0x0000000102040810ULL, 0x0000000080402010ULL, 0x1010101010101010ULL, 0x10101011925438ffULL },
-	{ 0x0000010204081020ULL, 0x0000000000804020ULL, 0x2020202020202020ULL, 0x2020212224a870ffULL },
-	{ 0x0001020408102040ULL, 0x0000000000008040ULL, 0x4040404040404040ULL, 0x404142444850e0ffULL },
-	{ 0x0102040810204080ULL, 0x0000000000000080ULL, 0x8080808080808080ULL, 0x8182848890a0c0ffULL },
-	{ 0x0000000000000102ULL, 0x4020100804020104ULL, 0x0101010101010101ULL, 0x412111090503ff03ULL },
-	{ 0x0000000000010204ULL, 0x8040201008040201ULL, 0x0202020202020202ULL, 0x824222120a07ff07ULL },
-	{ 0x0000000001020408ULL, 0x0080402010080402ULL, 0x0404040404040404ULL, 0x04844424150eff0eULL },
-	{ 0x0000000102040810ULL, 0x0000804020100804ULL, 0x0808080808080808ULL, 0x080888492a1cff1cULL },
-	{ 0x0000010204081020ULL, 0x0000008040201008ULL, 0x1010101010101010ULL, 0x101011925438ff38ULL },
-	{ 0x0001020408102040ULL, 0x0000000080402010ULL, 0x2020202020202020ULL, 0x20212224a870ff70ULL },
-	{ 0x0102040810204080ULL, 0x0000000000804020ULL, 0x4040404040404040ULL, 0x4142444850e0ffe0ULL },
-	{ 0x0204081020408001ULL, 0x0000000000008040ULL, 0x8080808080808080ULL, 0x82848890a0c0ffc0ULL },
-	{ 0x0000000000010204ULL, 0x201008040201000aULL, 0x0101010101010101ULL, 0x2111090503ff0305ULL },
-	{ 0x0000000001020408ULL, 0x4020100804020101ULL, 0x0202020202020202ULL, 0x4222120a07ff070aULL },
-	{ 0x0000000102040810ULL, 0x8040201008040201ULL, 0x0404040404040404ULL, 0x844424150eff0e15ULL },
-	{ 0x0000010204081020ULL, 0x0080402010080402ULL, 0x0808080808080808ULL, 0x0888492a1cff1c2aULL },
-	{ 0x0001020408102040ULL, 0x0000804020100804ULL, 0x1010101010101010ULL, 0x1011925438ff3854ULL },
-	{ 0x0102040810204080ULL, 0x0000008040201008ULL, 0x2020202020202020ULL, 0x212224a870ff70a8ULL },
-	{ 0x0204081020408001ULL, 0x0000000080402010ULL, 0x4040404040404040ULL, 0x42444850e0ffe050ULL },
-	{ 0x0408102040800003ULL, 0x0000000000804020ULL, 0x8080808080808080ULL, 0x848890a0c0ffc0a0ULL },
-	{ 0x0000000001020408ULL, 0x1008040201000016ULL, 0x0101010101010101ULL, 0x11090503ff030509ULL },
-	{ 0x0000000102040810ULL, 0x2010080402010005ULL, 0x0202020202020202ULL, 0x22120a07ff070a12ULL },
-	{ 0x0000010204081020ULL, 0x4020100804020101ULL, 0x0404040404040404ULL, 0x4424150eff0e1524ULL },
-	{ 0x0001020408102040ULL, 0x8040201008040201ULL, 0x0808080808080808ULL, 0x88492a1cff1c2a49ULL },
-	{ 0x0102040810204080ULL, 0x0080402010080402ULL, 0x1010101010101010ULL, 0x11925438ff385492ULL },
-	{ 0x0204081020408001ULL, 0x0000804020100804ULL, 0x2020202020202020ULL, 0x2224a870ff70a824ULL },
-	{ 0x0408102040800003ULL, 0x0000008040201008ULL, 0x4040404040404040ULL, 0x444850e0ffe05048ULL },
-	{ 0x0810204080000007ULL, 0x0000000080402010ULL, 0x8080808080808080ULL, 0x8890a0c0ffc0a090ULL },
-	{ 0x0000000102040810ULL, 0x080402010000002eULL, 0x0101010101010101ULL, 0x090503ff03050911ULL },
-	{ 0x0000010204081020ULL, 0x100804020100000dULL, 0x0202020202020202ULL, 0x120a07ff070a1222ULL },
-	{ 0x0001020408102040ULL, 0x2010080402010003ULL, 0x0404040404040404ULL, 0x24150eff0e152444ULL },
-	{ 0x0102040810204080ULL, 0x4020100804020101ULL, 0x0808080808080808ULL, 0x492a1cff1c2a4988ULL },
-	{ 0x0204081020408002ULL, 0x8040201008040201ULL, 0x1010101010101010ULL, 0x925438ff38549211ULL },
-	{ 0x0408102040800005ULL, 0x0080402010080402ULL, 0x2020202020202020ULL, 0x24a870ff70a82422ULL },
-	{ 0x081020408000000bULL, 0x0000804020100804ULL, 0x4040404040404040ULL, 0x4850e0ffe0504844ULL },
-	{ 0x1020408000000017ULL, 0x0000008040201008ULL, 0x8080808080808080ULL, 0x90a0c0ffc0a09088ULL },
-	{ 0x0000010204081020ULL, 0x040201000000005eULL, 0x0101010101010101ULL, 0x0503ff0305091121ULL },
-	{ 0x0001020408102040ULL, 0x080402010000001dULL, 0x0202020202020202ULL, 0x0a07ff070a122242ULL },
-	{ 0x0102040810204080ULL, 0x100804020100000bULL, 0x0404040404040404ULL, 0x150eff0e15244484ULL },
-	{ 0x0204081020408001ULL, 0x2010080402010003ULL, 0x0808080808080808ULL, 0x2a1cff1c2a498808ULL },
-	{ 0x0408102040800003ULL, 0x4020100804020101ULL, 0x1010101010101010ULL, 0x5438ff3854921110ULL },
-	{ 0x081020408000000eULL, 0x8040201008040201ULL, 0x2020202020202020ULL, 0xa870ff70a8242221ULL },
-	{ 0x102040800000001dULL, 0x0080402010080402ULL, 0x4040404040404040ULL, 0x50e0ffe050484442ULL },
-	{ 0x204080000000003bULL, 0x0000804020100804ULL, 0x8080808080808080ULL, 0xa0c0ffc0a0908884ULL },
-	{ 0x0001020408102040ULL, 0x02010000000000beULL, 0x0101010101010101ULL, 0x03ff030509112141ULL },
-	{ 0x0102040810204080ULL, 0x040201000000003dULL, 0x0202020202020202ULL, 0x07ff070a12224282ULL },
-	{ 0x0204081020408001ULL, 0x080402010000001bULL, 0x0404040404040404ULL, 0x0eff0e1524448404ULL },
-	{ 0x0408102040800003ULL, 0x1008040201000007ULL, 0x0808080808080808ULL, 0x1cff1c2a49880808ULL },
-	{ 0x0810204080000007ULL, 0x2010080402010003ULL, 0x1010101010101010ULL, 0x38ff385492111010ULL },
-	{ 0x102040800000000fULL, 0x4020100804020101ULL, 0x2020202020202020ULL, 0x70ff70a824222120ULL },
-	{ 0x204080000000003eULL, 0x8040201008040201ULL, 0x4040404040404040ULL, 0xe0ffe05048444241ULL },
-	{ 0x408000000000007dULL, 0x0080402010080402ULL, 0x8080808080808080ULL, 0xc0ffc0a090888482ULL },
-	{ 0x0102040810204080ULL, 0x010000000000027eULL, 0x0101010101010101ULL, 0xff03050911214181ULL },
-	{ 0x0204081020408001ULL, 0x020100000000007dULL, 0x0202020202020202ULL, 0xff070a1222428202ULL },
-	{ 0x0408102040800003ULL, 0x040201000000003bULL, 0x0404040404040404ULL, 0xff0e152444840404ULL },
-	{ 0x0810204080000007ULL, 0x0804020100000017ULL, 0x0808080808080808ULL, 0xff1c2a4988080808ULL },
-	{ 0x102040800000000fULL, 0x1008040201000007ULL, 0x1010101010101010ULL, 0xff38549211101010ULL },
-	{ 0x204080000000001fULL, 0x2010080402010003ULL, 0x2020202020202020ULL, 0xff70a82422212020ULL },
-	{ 0x408000000000003fULL, 0x4020100804020101ULL, 0x4040404040404040ULL, 0xffe0504844424140ULL },
-	{ 0x800000000000017eULL, 0x8040201008040201ULL, 0x8080808080808080ULL, 0xffc0a09088848281ULL }
-};
-
-/**
- * Count last flipped discs when playing on the last empty.
- *
- * @param pos the last empty square.
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-
-inline int last_flip(int pos, unsigned long long P)
-{
-	unsigned char	n_flipped;
-	int	x = pos & 7;
-	int	y = pos >> 3;
-
-	P &= mask_x[pos][3];	// mask out unrelated bits to make dummy 0 bits for outside
-	// n_flipped  = COUNT_FLIP[x][_bextr_u64(P, pos & 0x38, 8)];
-	n_flipped  = COUNT_FLIP[x][(unsigned char) (P >> (pos & 0x38))];
-	n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][0])];
-	n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][1])];
-	n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][2])];
-
-	return n_flipped;
-}
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/count_last_flip_carry_64.c b/src/count_last_flip_carry_64.c
index 969bcf8..9d409a2 100644
--- a/src/count_last_flip_carry_64.c
+++ b/src/count_last_flip_carry_64.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_carry_64.c
  *
@@ -1278,1284 +1274,3 @@ int (*count_last_flip[])(const unsigned long long) = {
 	count_last_flip_pass,
 };
 
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_carry_64.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move
- * for each square of the board. These functions are gathered into an array of
- * functions, so that a fast access to each function is allowed. The generic
- * form of the function take as input the player bitboard and return twice
- * the number of flipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * With 135 degree merge, instead of Valery ClaudePierre's modification.
- *
- * For top to bottom flip, LS1B isolation (http://chessprogramming.wikispaces.com/
- * General+Setwise+Operations) is used to get the outflank bit.
- *
- * @date 1998 - 2018
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-/** precomputed count flip array */
-static const char COUNT_FLIP_R[128] = {
-	 0,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	12,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	10,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0,
-	 8,  0,  2,  0,  4,  0,  2,  0,  6,  0,  2,  0,  4,  0,  2,  0
-};
-
-static const char COUNT_FLIP_2[256] = {
-	 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0
-};
-
-static const char COUNT_FLIP_3[256] = {
-	 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_4[256] = {
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_5[256] = {
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const char COUNT_FLIP_L[128] = {
-	 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-	 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-/**
- * Count last flipped discs when playing on square A1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0101010101010100ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000020406080a0c0ULL) >> 60;
-	n_flipped += COUNT_FLIP_R[(P >> 1) & 0x7f];
-	P_d9 = P & 0x8040201008040200ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0202020202020200ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000102030405060ULL) >> 60;
-	n_flipped += COUNT_FLIP_R[(P >> 2) & 0x3f];
-	P_d9 = P & 0x0080402010080400ULL;
-	n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0404040404040400ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000081018202830ULL) >> 60;
-	n_flipped += COUNT_FLIP_2[P & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04ULL) * 0x0101010101010101ULL) >> 56];	// A3C1H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0808080808080800ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000004080c101418ULL) >> 60;
-	n_flipped += COUNT_FLIP_3[P & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408ULL) * 0x0101010101010101ULL) >> 56];	// A4D1H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x1010101010101000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000020406080a0cULL) >> 60;
-	n_flipped += COUNT_FLIP_4[P & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810ULL) * 0x0101010101010101ULL) >> 56];	// A5E1H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x2020202020202000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000010203040506ULL) >> 60;
-	n_flipped += COUNT_FLIP_5[P & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020ULL) * 0x0101010101010101ULL) >> 56];	// A6F1H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x4040404040404000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000008101820283ULL) >> 60;
-	n_flipped += COUNT_FLIP_L[(P << 1) & 0x7e];
-	P_d7 = P & 0x0001020408102000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H1.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H1(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x8080808080808000ULL;
-	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000008101820283ULL) >> 60;
-	n_flipped += COUNT_FLIP_L[P & 0x7f];
-	P_d7 = P & 0x0102040810204000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0101010101010000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x00000020406080a0ULL) >> 60;
-	n_flipped += COUNT_FLIP_R[(P >> 9) & 0x7f];
-	P_d9 = P & 0x4020100804020000ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 1) * 0x00000010100c0805ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d9;
-
-	P_v = P & 0x0202020202020000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000001020304050ULL) >> 60;
-	n_flipped += COUNT_FLIP_R[(P >> 10) & 0x3f];
-	P_d9 = P & 0x8040201008040000ULL;
-	n_flipped += (((P_d9 & -P_d9) >> 2) * 0x00000010100c0805ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0404040404040000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000000810182028ULL) >> 60;
-	n_flipped += COUNT_FLIP_2[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x00804020110A0400ULL) * 0x0101010101010101ULL) >> 56];	// A4C2H7
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x0808080808080000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x00000004080c1014ULL) >> 60;
-	n_flipped += COUNT_FLIP_3[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000804122140800ULL) * 0x0101010101010101ULL) >> 56];	// A5D2H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x1010101010100000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x000000020406080aULL) >> 60;
-	n_flipped += COUNT_FLIP_4[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000018244281000ULL) * 0x0101010101010101ULL) >> 56];	// A6E2H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v;
-
-	P_v = P & 0x2020202020200000ULL;
-	n_flipped  = ((P_v & -P_v) * 0x0000000102030405ULL) >> 60;
-	n_flipped += COUNT_FLIP_5[(P >> 8) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0001020488502000ULL) * 0x0101010101010101ULL) >> 56];	// A7F2H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x4040404040400000ULL;
-	n_flipped  = (((P_v & -P_v) >> 1) * 0x0000000102030405ULL) >> 60;
-	n_flipped += COUNT_FLIP_L[(P >> 7) & 0x7e];
-	P_d7 = P & 0x0102040810200000ULL;
-	n_flipped += ((P_d7 & -P_d7) * 0x00000002081840a0ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H2.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H2(const unsigned long long P)
-{
-	int n_flipped;
-	unsigned long long P_v, P_d7;
-
-	P_v = P & 0x8080808080800000ULL;
-	n_flipped  = (((P_v & -P_v) >> 2) * 0x0000000102030405ULL) >> 60;
-	n_flipped += COUNT_FLIP_L[(P >> 8) & 0x7f];
-	P_d7 = P & 0x0204081020400000ULL;
-	n_flipped += (((P_d7 & -P_d7) >> 2) * 0x0000000410308143ULL) >> 60;
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x2010080402010101ULL) * 0x0102040404040404ULL) >> 56];	// A1A3F8
-	n_flipped += COUNT_FLIP_R[(P >> 17) & 0x7f];
-	n_flipped += COUNT_FLIP_5[((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56];	// C1A3A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x4020100804020202ULL) * 0x0081020202020202ULL) >> 56];	// B1B3G8
-	n_flipped += COUNT_FLIP_R[(P >> 18) & 0x3f];
-	n_flipped += COUNT_FLIP_5[(((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56];	// D1B3B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x4040404040402010ULL) * 0x0010101020408102ULL) >> 56];	// E1G3G8
-	n_flipped += COUNT_FLIP_L[(P >> 15) & 0x7e];
-	n_flipped += COUNT_FLIP_5[(((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56];	// G1G3B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H3.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H3(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_2[((P & 0x8080808080804020ULL) * 0x0008080810204081ULL) >> 56];	// F1H3H8
-	n_flipped += COUNT_FLIP_L[(P >> 16) & 0x7f];
-	n_flipped += COUNT_FLIP_5[(((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56];	// H1H3C8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x1008040201010101ULL) * 0x0102040808080808ULL) >> 56];	// A1A4E8
-	n_flipped += COUNT_FLIP_R[(P >> 25) & 0x7f];
-	n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56];	// D1A4A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x2010080402020202ULL) * 0x0081020404040404ULL) >> 56];	// B1B4F8
-	n_flipped += COUNT_FLIP_R[(P >> 26) & 0x3f];
-	n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56];	// E1B4B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x4040404040201008ULL) * 0x0020202020408102ULL) >> 56];	// D1G4G8
-	n_flipped += COUNT_FLIP_L[(P >> 23) & 0x7e];
-	n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56];	// G1G4C8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H4.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H4(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_3[((P & 0x8080808080402010ULL) * 0x0010101010204081ULL) >> 56];	// E1H4H8
-	n_flipped += COUNT_FLIP_L[(P >> 24) & 0x7f];
-	n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56];	// H1H4D8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0804020101010101ULL) * 0x0102040810101010ULL) >> 56];	// A1A5D8
-	n_flipped += COUNT_FLIP_R[(P >> 33) & 0x7f];
-	n_flipped += COUNT_FLIP_3[((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56];	// E1A5A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x1008040202020202ULL) * 0x0081020408080808ULL) >> 56];	// B1B5E8
-	n_flipped += COUNT_FLIP_R[(P >> 34) & 0x3f];
-	n_flipped += COUNT_FLIP_3[(((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56];	// F1B5B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 32) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x4040404020100804ULL) * 0x0040404040408102ULL) >> 56];	// C1G5G8
-	n_flipped += COUNT_FLIP_L[(P >> 31) & 0x7e];
-	n_flipped += COUNT_FLIP_3[(((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56];	// G1G5D8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H5.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H5(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_4[((P & 0x8080808040201008ULL) * 0x0020202020204081ULL) >> 56];	// D1H5H8
-	n_flipped += COUNT_FLIP_L[(P >> 32) & 0x7f];
-	n_flipped += COUNT_FLIP_3[(((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56];	// H1H5E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0402010101010101ULL) * 0x0102040810202020ULL) >> 56];	// A1A6C8
-	n_flipped += COUNT_FLIP_R[(P >> 41) & 0x7f];
-	n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56];	// F1A6A8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0804020202020202ULL) * 0x0081020408101010ULL) >> 56];	// B1B6D8
-	n_flipped += COUNT_FLIP_R[(P >> 42) & 0x3f];
-	n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56];	// G1B6B8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x1008040201000000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[(P >> 40) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0810204080000000ULL) * 0x0101010101010101ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x4040402010080402ULL) * 0x0080808080808102ULL) >> 56];	// B1G6G8
-	n_flipped += COUNT_FLIP_L[(P >> 39) & 0x7e];
-	n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56];	// G1G6E8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H6.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H6(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_5[((P & 0x8080804020100804ULL) * 0x0040404040404081ULL) >> 56];	// C1H6H8
-	n_flipped += COUNT_FLIP_L[(P >> 40) & 0x7f];
-	n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56];	// H1H6F8
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000010101010101ULL) * 0x0102040810204080ULL) >> 55];
-	n_flipped += COUNT_FLIP_R[(P >> 49) & 0x7f];
-	n_flipped += COUNT_FLIP_R[((P & 0x0000020408102040ULL) * 0x0101010101010101ULL) >> 57];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000020202020202ULL) * 0x0081020408102040ULL) >> 55];
-	n_flipped += COUNT_FLIP_R[(P >> 50) & 0x3f];
-	n_flipped += COUNT_FLIP_R[((P & 0x0000040810204080ULL) * 0x0101010101010101ULL) >> 58];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000040404040404ULL) * 0x0040810204081020ULL) >> 55];
-	n_flipped += COUNT_FLIP_2[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_2[((P & 0x00040A1120408000ULL) * 0x0101010101010101ULL) >> 56];	// A5C7H2
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000080808080808ULL) * 0x0020408102040810ULL) >> 55];
-	n_flipped += COUNT_FLIP_3[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_3[((P & 0x0008142241800000ULL) * 0x0101010101010101ULL) >> 56];	// A4D7H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000101010101010ULL) * 0x0010204081020408ULL) >> 55];
-	n_flipped += COUNT_FLIP_4[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_4[((P & 0x0010284482010000ULL) * 0x0101010101010101ULL) >> 56];	// A3E7H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000202020202020ULL) * 0x0008102040810204ULL) >> 55];
-	n_flipped += COUNT_FLIP_5[(P >> 48) & 0xff];
-	n_flipped += COUNT_FLIP_5[((P & 0x0020508804020100ULL) * 0x0101010101010101ULL) >> 56];	// A2F7H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000404040404040ULL) * 0x0004081020408102ULL) >> 55];
-	n_flipped += COUNT_FLIP_L[(P >> 47) & 0x7e];
-	n_flipped += COUNT_FLIP_L[((P & 0x0000201008040201ULL) * 0x0101010101010101ULL) >> 55];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H7.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H7(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0000808080808080ULL) * 0x0002040810204081ULL) >> 55];
-	n_flipped += COUNT_FLIP_L[(P >> 48) & 0x7f];
-	n_flipped += COUNT_FLIP_L[((P & 0x0000402010080402ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square A8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_A8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0001010101010101ULL) * 0x0102040810204080ULL) >> 56];
-	n_flipped += COUNT_FLIP_R[P >> 57];
-	n_flipped += COUNT_FLIP_R[((P & 0x0002040810204080ULL) * 0x0101010101010101ULL) >> 57];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square B8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_B8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0002020202020202ULL) * 0x0081020408102040ULL) >> 56];
-	n_flipped += COUNT_FLIP_R[P >> 58];
-	n_flipped += COUNT_FLIP_R[((P & 0x0004081020408000ULL) * 0x0101010101010101ULL) >> 58];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square C8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_C8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0004040404040404ULL) * 0x0040810204081020ULL) >> 56];
-	n_flipped += COUNT_FLIP_2[P >> 56];
-	n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000ULL) * 0x0101010101010101ULL) >> 56];	// A6C8H3
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square D8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_D8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0008080808080808ULL) * 0x0020408102040810ULL) >> 56];
-	n_flipped += COUNT_FLIP_3[P >> 56];
-	n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000ULL) * 0x0101010101010101ULL) >> 56];	// A5D8H4
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square E8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_E8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0010101010101010ULL) * 0x0010204081020408ULL) >> 56];
-	n_flipped += COUNT_FLIP_4[P >> 56];
-	n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000ULL) * 0x0101010101010101ULL) >> 56];	// A4E8H5
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square F8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_F8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0020202020202020ULL) * 0x0008102040810204ULL) >> 56];
-	n_flipped += COUNT_FLIP_5[P >> 56];
-	n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000ULL) * 0x0101010101010101ULL) >> 56];	// A3F8H6
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square G8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_G8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0040404040404040ULL) * 0x0004081020408102ULL) >> 56];
-	n_flipped += COUNT_FLIP_L[(P >> 55) & 0x7e];
-	n_flipped += COUNT_FLIP_L[((P & 0x0020100804020100ULL) * 0x0101010101010101ULL) >> 55];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when playing on square H8.
- *
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-static int count_last_flip_H8(const unsigned long long P)
-{
-	int n_flipped;
-
-	n_flipped  = COUNT_FLIP_L[((P & 0x0080808080808080ULL) * 0x0002040810204081ULL) >> 56];
-	n_flipped += COUNT_FLIP_L[(P >> 56) & 0x7f];
-	n_flipped += COUNT_FLIP_L[((P & 0x0040201008040201ULL) * 0x0101010101010101ULL) >> 56];
-
-	return n_flipped;
-}
-
-/**
- * Count last flipped discs when plassing.
- *
- * @param P player's disc pattern (unused).
- * @return zero.
- */
-static int count_last_flip_pass(const unsigned long long P)
-{
-	(void) P; // useless code to shut-up compiler warning
-	return 0;
-}
-
-/** Array of functions to count flipped discs of the last move */
-int (*count_last_flip[])(const unsigned long long) = {
-	count_last_flip_A1, count_last_flip_B1, count_last_flip_C1, count_last_flip_D1,
-	count_last_flip_E1, count_last_flip_F1, count_last_flip_G1, count_last_flip_H1,
-	count_last_flip_A2, count_last_flip_B2, count_last_flip_C2, count_last_flip_D2,
-	count_last_flip_E2, count_last_flip_F2, count_last_flip_G2, count_last_flip_H2,
-	count_last_flip_A3, count_last_flip_B3, count_last_flip_C3, count_last_flip_D3,
-	count_last_flip_E3, count_last_flip_F3, count_last_flip_G3, count_last_flip_H3,
-	count_last_flip_A4, count_last_flip_B4, count_last_flip_C4, count_last_flip_D4,
-	count_last_flip_E4, count_last_flip_F4, count_last_flip_G4, count_last_flip_H4,
-	count_last_flip_A5, count_last_flip_B5, count_last_flip_C5, count_last_flip_D5,
-	count_last_flip_E5, count_last_flip_F5, count_last_flip_G5, count_last_flip_H5,
-	count_last_flip_A6, count_last_flip_B6, count_last_flip_C6, count_last_flip_D6,
-	count_last_flip_E6, count_last_flip_F6, count_last_flip_G6, count_last_flip_H6,
-	count_last_flip_A7, count_last_flip_B7, count_last_flip_C7, count_last_flip_D7,
-	count_last_flip_E7, count_last_flip_F7, count_last_flip_G7, count_last_flip_H7,
-	count_last_flip_A8, count_last_flip_B8, count_last_flip_C8, count_last_flip_D8,
-	count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8,
-	count_last_flip_pass,
-};
-
->>>>>>> b3f048d (copyright changes)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/count_last_flip_lzcnt.c b/src/count_last_flip_lzcnt.c
index 07b8cda..4094684 100644
--- a/src/count_last_flip_lzcnt.c
+++ b/src/count_last_flip_lzcnt.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_lzcnt.c
  *
@@ -29,17 +25,7 @@
  * 
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #include "bit_intrinsics.h"
-=======
-#ifdef __LZCNT__
-#include <x86intrin.h>
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#include "bit_intrinsics.h"
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 /** precomputed count flip array */
 static const unsigned char COUNT_FLIP[8][256] = {
@@ -125,74 +111,6 @@ static const unsigned char COUNT_FLIP[8][256] = {
 	}
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef lzcnt_u64
-
-/* bit masks for vertical and diagonal lines for A8..H8 */
-static const unsigned long long mask_9_7[8][2] = {
-	{ 0x0204081020408000, 0x0000000000000000 },
-	{ 0x0204081020400000, 0x8000000000000000 },
-	{ 0x0204081020000000, 0x8040000000000000 },
-	{ 0x0204081000000000, 0x8040200000000000 },
-	{ 0x0204080000000000, 0x8040201000000000 },
-	{ 0x0204000000000000, 0x8040201008000000 },
-	{ 0x0200000000000000, 0x8040201008040000 },
-	{ 0x0000000000000000, 0x8040201008040200 }
-=======
-/* bit masks for diagonal lines */
-static const unsigned long long mask_d[2][64] = {
-	{
-		0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL
-	},
-	{
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL,
-		0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL,
-		0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL,
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL
-	}
-};
-
-#ifdef __LZCNT__
-
-/* bit masks for vertical and diagonal lines for A8..H8 */
-static const unsigned long long mask_7[8] = {
-	0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL,
-	0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL
-};
-
-static const unsigned long long mask_9[8] = {
-	0x0000000000000000ULL, 0x0000000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL,
-	0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #ifdef lzcnt_u64
 
 /* bit masks for vertical and diagonal lines for A8..H8 */
@@ -205,15 +123,10 @@ static const unsigned long long mask_9_7[8][2] = {
 	{ 0x0204000000000000, 0x8040201008000000 },
 	{ 0x0200000000000000, 0x8040201008040000 },
 	{ 0x0000000000000000, 0x8040201008040200 }
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 };
 
 #else
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 /* bit masks for vertical and diagonal lines for A1..H1 */
 static const unsigned long long mask_9_7[8][2] = {
 	{ 0x0000000000000000, 0x4020100804020100 },
@@ -224,28 +137,10 @@ static const unsigned long long mask_9_7[8][2] = {
 	{ 0x0000000408102040, 0x0000000000020100 },
 	{ 0x0000020408102040, 0x0000000000000100 },
 	{ 0x0001020408102040, 0x0000000000000000 }
-<<<<<<< HEAD
-=======
-/* bit masks for vertical and diagonal lines for A1..H2 */
-static const unsigned long long mask_7[8] = {
-	0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000002040ULL, 0x0000000000102040ULL,
-	0x0000000008102040ULL, 0x0000000408102040ULL, 0x0000020408102040ULL, 0x0001020408102040ULL
-};
-
-static const unsigned long long mask_9[16] = {
-	0x4020100804020100ULL, 0x0020100804020100ULL, 0x0000100804020100ULL, 0x0000000804020100ULL,
-	0x0000000004020100ULL, 0x0000000000020100ULL, 0x0000000000000100ULL, 0x0000000000000000ULL
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 };
 
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 /* bit masks for diagonal lines for A5..H6 */
 static const unsigned long long mask_d[16][2] = {
 	{ 0x0000000102040810, 0x0804020100000000 },
@@ -265,55 +160,18 @@ static const unsigned long long mask_d[16][2] = {
 	{ 0x1020408000000000, 0x0080402010080402 },
 	{ 0x2040800000000000, 0x0000804020100804 }
 };
-<<<<<<< HEAD
 
 #ifdef HAS_CPU_64
 
 #define	packV(P, x)	(((((P) >> (x)) & 0x0101010101010101) * 0x0102040810204080) >> 56)
 #define packD(PM)	(((PM) * 0x0101010101010101) >> 56)
-=======
-#ifdef __x86_64__
-
-#define	packV(P, x)	(((((P) >> (x)) & 0x0101010101010101ULL) * 0x0102040810204080ULL) >> 56)
-#define packD(PM)	(((PM) * 0x0101010101010101ULL) >> 56)
-#define	mask_8(x)	(0x0101010101010101ULL << (x))
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-
-#ifdef HAS_CPU_64
-
-#define	packV(P, x)	(((((P) >> (x)) & 0x0101010101010101) * 0x0102040810204080) >> 56)
-#define packD(PM)	(((PM) * 0x0101010101010101) >> 56)
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 #else
 
 #define	packV(P, x)	(((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24)
 #define	packD(PM)	(((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-#endif // HAS_CPU_64
-=======
-#define	mask_8(x)	(((unsigned long long) (0x01010101u << (x)) << 32) | (0x01010101u << (x)))
-
-static int inline __lzcnt64(unsigned long long x) {
-	int	y;
-	__asm__ (
-		"lzcntl	%1, %0\n\t"
-		"lzcntl	%2, %2\n\t"
-		"leal	(%0, %2), %0\n\t"
-		"cmovnc	%2, %0"
-	: "=&r" (y) : "0" ((unsigned int) x), "r" ((unsigned int) (x >> 32)) );
-	return y;
-}
-
-#endif // __x86_64__
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 
 #endif // HAS_CPU_64
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 /**
  * Count last flipped discs when playing on the last empty.
@@ -326,333 +184,6 @@ static int inline __lzcnt64(unsigned long long x) {
 int last_flip(int pos, unsigned long long P)
 {
 	unsigned long long	P8, P7, P9;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	int	n_flipped;
-	int	x = pos & 7;
-
-	n_flipped = COUNT_FLIP[x][(unsigned char) (P >> (pos & 0x38))];
-
-#ifdef lzcnt_u64
-
-	if (pos < 0x20) {
-		P = vertical_mirror(P);
-		pos ^= 0x38;
-	}
-
-	if (pos >= 0x30) {
-		P <<= (64 - pos);
-		P8 = P & 0x0101010101010101;
-		P7 = P & mask_9_7[x][0];
-		P9 = (P << 8) & mask_9_7[x][1];
-		n_flipped += ((lzcnt_u64(P8) & 0x38) + (lzcnt_u64(P7) & 0x38) + (lzcnt_u64(P9) & 0x38)) >> 2;
-
-		return n_flipped;
-	}
-
-#else // ls1b - slow
-
-	if (pos & 0x10) {	// 0 1 2 3 4 5 6 7 -> 0 1 4 5 4 5 0 1
-		P = vertical_mirror(P);
-		pos ^= 0x38;
-	}
-
-	if (pos < 0x10) {
-		P >>= (pos + 1);
-		P8 = P & 0x0080808080808080;
-		n_flipped += ((P8 & -P8) * 0x00004080c1014180) >> 60;
-		P7 = P & mask_9_7[x][0];
-		n_flipped += ((P7 & -P7) * 0x0001040c2050c000) >> 60;
-		P9 = P & mask_9_7[x][1];
-		n_flipped += ((P9 & -P9) * 0x000010100c080503) >> 60;
-
-		return n_flipped;
-	}
-#endif
-
-	n_flipped += COUNT_FLIP[pos >> 3][packV(P, x)];
-	P7 = P & mask_d[pos - 0x20][0];
-	n_flipped += COUNT_FLIP[x][packD(P7)];
-	P9 = P & mask_d[pos - 0x20][1];
-	n_flipped += COUNT_FLIP[x][packD(P9)];
-
-	return n_flipped;
-}
-=======
-/**
- * @file count_last_flip_lzcnt.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * @date 1998 - 2014
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-#ifdef __LZCNT__
-#include <x86intrin.h>
-#endif
-
-/** precomputed count flip array */
-static const unsigned char COUNT_FLIP[8][256] = {
-	{
-		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		12, 12,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-	},
-	{
-		 0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		10, 10, 10, 10,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	},
-	{
-		 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	}
-};
-
-/* bit masks for diagonal lines */
-static const unsigned long long mask_d[2][64] = {
-	{
-		0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL
-	},
-	{
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL,
-		0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL,
-		0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL,
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL
-	}
-};
-
-#ifdef __LZCNT__
-
-/* bit masks for vertical and diagonal lines for A8..H8 */
-static const unsigned long long mask_7[8] = {
-	0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL,
-	0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL
-};
-
-static const unsigned long long mask_9[8] = {
-	0x0000000000000000ULL, 0x0000000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL,
-	0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL
-};
-
-#else
-
-/* bit masks for vertical and diagonal lines for A1..H2 */
-static const unsigned long long mask_7[8] = {
-	0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000002040ULL, 0x0000000000102040ULL,
-	0x0000000008102040ULL, 0x0000000408102040ULL, 0x0000020408102040ULL, 0x0001020408102040ULL
-};
-
-static const unsigned long long mask_9[16] = {
-	0x4020100804020100ULL, 0x0020100804020100ULL, 0x0000100804020100ULL, 0x0000000804020100ULL,
-	0x0000000004020100ULL, 0x0000000000020100ULL, 0x0000000000000100ULL, 0x0000000000000000ULL
-};
-
-#endif
-
-#ifdef __x86_64__
-
-#define	packV(P, x)	(((((P) >> (x)) & 0x0101010101010101ULL) * 0x0102040810204080ULL) >> 56)
-#define packD(PM)	(((PM) * 0x0101010101010101ULL) >> 56)
-#define	mask_8(x)	(0x0101010101010101ULL << (x))
-
-#else
-
-#define	packV(P, x)	(((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24)
-#define	packD(PM)	(((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24)
-#define	mask_8(x)	(((unsigned long long) (0x01010101u << (x)) << 32) | (0x01010101u << (x)))
-
-static int inline __lzcnt64(unsigned long long x) {
-	int	y;
-	__asm__ (
-		"lzcntl	%1, %0\n\t"
-		"lzcntl	%2, %2\n\t"
-		"leal	(%0, %2), %0\n\t"
-		"cmovnc	%2, %0"
-	: "=&r" (y) : "0" ((unsigned int) x), "r" ((unsigned int) (x >> 32)) );
-	return y;
-}
-
-#endif // __x86_64__
-
-/**
- * Count last flipped discs when playing on the last empty.
- *
- * @param pos the last empty square.
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-
-int last_flip(int pos, unsigned long long P)
-{
-	unsigned long long	P8, P7, P9;
-	unsigned int	t;
-	int	n_flipped;
-	int	x = pos & 7;
-	int	y = pos & 0x38;
-
-	n_flipped = COUNT_FLIP[x][(unsigned char) (P >> y)];
-
-	switch (pos & 0x30) {
-
-#ifdef __LZCNT__
-
-		case 0:
-			P = __builtin_bswap64(P);
-			y ^= 0x38;
-			// not break;
-		case 0x30:
-			P <<= (64 - y);
-			P8 = P & mask_8(x);
-			P7 = P & mask_7[x];
-			P9 = P & mask_9[x];
-			n_flipped += (((int) __lzcnt64(P8) & 0x38) + ((int) __lzcnt64(P7) & 0x38) + ((int) __lzcnt64(P9) & 0x38)) >> 2;
-			break;
-
-#else // ls1b - slow
-		case 0x30:
-			P = __builtin_bswap64(P);
-			pos ^= 0x38;
-			// not break;
-		case 0:
-			P >>= (pos + 1);
-			P8 = P & 0x0080808080808080ULL;
-			n_flipped += ((P8 & -P8) * 0x00004080c1014180ULL) >> 60;
-			P7 = P & mask_7[x];
-			n_flipped += ((P7 & -P7) * 0x0001040c2050c000ULL) >> 60;
-			P9 = P & mask_9[x];
-			n_flipped += ((P9 & -P9) * 0x000010100c080503ULL) >> 60;
-			break;
-#endif
-
-		default:
-			y = pos >> 3;
-			n_flipped += COUNT_FLIP[y][packV(P, x)];
-			P7 = P & mask_d[0][pos];
-			n_flipped += COUNT_FLIP[x][packD(P7)];
-			P9 = P & mask_d[1][pos];
-			n_flipped += COUNT_FLIP[x][packD(P9)];
-			break;
-	}
-	return n_flipped;
-}
->>>>>>> f24cc06 (avoid BMI2 for AMD; more lzcnt/tzcnt in count_last_flip_bitscan)
-=======
-	unsigned int	t;
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	int	n_flipped;
 	int	x = pos & 7;
 
@@ -703,4 +234,3 @@ int last_flip(int pos, unsigned long long P)
 
 	return n_flipped;
 }
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/count_last_flip_neon.c b/src/count_last_flip_neon.c
index 4800bc7..45cb89e 100644
--- a/src/count_last_flip_neon.c
+++ b/src/count_last_flip_neon.c
@@ -17,28 +17,10 @@
  * For optimization purpose, the value returned is twice the number of flipped
  * disc, to facilitate the computation of disc difference.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
- * @date 1998 - 2022
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
  * 
  */
 
@@ -128,14 +110,7 @@ const unsigned char COUNT_FLIP[8][256] = {
 	},
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifdef HAS_CPU_64
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#ifdef HAS_CPU_64
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 /* bit masks for diagonal lines (interleaved) */
 const uint64x2_t mask_dvhd[64][2] = {
 	{{ 0x000000000000ff01, 0x0000000000000000 }, { 0x0801040102010101, 0x8001400120011001 }},
@@ -203,10 +178,6 @@ const uint64x2_t mask_dvhd[64][2] = {
 	{{ 0x0000000000000000, 0xff40008000000000 }, { 0x0440024001400040, 0x4040204010400840 }},
 	{{ 0x0000000000000000, 0xff80000000000000 }, { 0x0880048002800180, 0x8080408020801080 }}
 };
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 #else
 /* bit masks for diagonal lines */
 const uint64x2_t mask_dvhd[64][2] = {
@@ -276,11 +247,6 @@ const uint64x2_t mask_dvhd[64][2] = {
 	{{ 0x8000000000000000, 0xff00000000000000 }, { 0x8080808080808080, 0x8040201008040201 }}
 };
 #endif
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 
 /**
  * Count last flipped discs when playing on the last empty.
@@ -290,48 +256,6 @@ const uint64x2_t mask_dvhd[64][2] = {
  * @return flipped disc count.
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-int last_flip(int pos, unsigned long long P)
-{
-	unsigned int	n_flips;
-	const unsigned char *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-	uint64x2_t	PP = vdupq_n_u64(P);
-	uint64x2_t	II;
-#ifdef HAS_CPU_64	// vaddvq
-	unsigned int t;
-	const uint64x2_t dmask = { 0x0808040402020101, 0x8080404020201010 };
-
-	PP = vreinterpretq_u64_u8(vzip1q_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(PP)));
-	II = vandq_u64(PP, mask_dvhd[pos][0]);	// 2 dirs interleaved
-	t = vaddvq_u16(vreinterpretq_u16_u64(II));
-	n_flips  = COUNT_FLIP_X[t >> 8];
-	n_flips += COUNT_FLIP_X[t & 0xFF];
-	II = vandq_u64(vreinterpretq_u64_u8(vtstq_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(mask_dvhd[pos][1]))), dmask);
-	t = vaddvq_u16(vreinterpretq_u16_u64(II));
-	n_flips += COUNT_FLIP_Y[t >> 8];
-	n_flips += COUNT_FLIP_Y[t & 0xFF];
-
-#else // Neon kindergarten
-	const uint64x2_t dmask = { 0x1020408001020408, 0x1020408001020408 };
-
-	II = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_u64(vandq_u64(PP, mask_dvhd[pos][0])))));
-	n_flips  = COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(II), 2)];
-	n_flips += COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(II), 0)];
-	II = vreinterpretq_u64_s8(vnegq_s8(vreinterpretq_s8_u8(vtstq_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(mask_dvhd[pos][1])))));
-	II = vpaddlq_u32(vmulq_u32(vreinterpretq_u32_u64(dmask), vreinterpretq_u32_u64(II)));
-	n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 11)];
-	n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 3)];
-#endif
-<<<<<<< HEAD
-=======
-#ifndef HAS_CPU_64
-#define vaddvq_u16(x)	vget_lane_u64(vpaddl_u32(vpaddl_u16(vadd_u16(vget_high_u16(x), vget_low_u16(x)))), 0)
-#endif
-
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 int last_flip(int pos, unsigned long long P)
 {
 	unsigned int	n_flips;
@@ -353,9 +277,6 @@ int last_flip(int pos, unsigned long long P)
 	n_flips += COUNT_FLIP_Y[t >> 8];
 	n_flips += COUNT_FLIP_Y[t & 0xFF];
 
-<<<<<<< HEAD
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 #else // Neon kindergarten
 	const uint64x2_t dmask = { 0x1020408001020408, 0x1020408001020408 };
 
@@ -367,10 +288,6 @@ int last_flip(int pos, unsigned long long P)
 	n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 11)];
 	n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 3)];
 #endif
-
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 	return n_flips;
 }
 
diff --git a/src/count_last_flip_plain.c b/src/count_last_flip_plain.c
index 88fbd9a..e13e3aa 100644
--- a/src/count_last_flip_plain.c
+++ b/src/count_last_flip_plain.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_plain.c
  *
@@ -163,15 +159,7 @@ const unsigned long long mask_d[2][64] = {
 #define	packV(P, x)	(((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24)
 #define	packD(PM)	(((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #endif // HAS_CPU_64
-=======
-#endif // __x86_64__
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#endif // HAS_CPU_64
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 /**
  * Count last flipped discs when playing on the last empty.
@@ -180,24 +168,10 @@ const unsigned long long mask_d[2][64] = {
  * @param P player's disc pattern.
  * @return flipped disc count.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-int last_flip(int pos, unsigned long long P)
-{
-	unsigned long long PM;
-	int	n_flipped;
-=======
-inline int last_flip(int pos, unsigned long long P)
-{
-	unsigned long long PM;
-	unsigned char	n_flipped;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 int last_flip(int pos, unsigned long long P)
 {
 	unsigned long long PM;
 	int	n_flipped;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	int	x = pos & 0x07;
 	int	y = pos >> 3;
 
@@ -210,194 +184,3 @@ int last_flip(int pos, unsigned long long P)
 
 	return n_flipped;
 }
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_plain.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * @date 1998 - 2017
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-/** precomputed count flip array */
-const unsigned char COUNT_FLIP[8][256] = {
-	{
-		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		12, 12,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-	},
-	{
-		 0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		10, 10, 10, 10,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	},
-	{
-		 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-};
-
-/* bit masks for diagonal lines */
-const unsigned long long mask_d[2][64] = {
-	{
-		0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL,
-		0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL,
-		0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL,
-		0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL,
-		0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL,
-		0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL,
-		0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL,
-		0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL
-	},
-	{
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL,
-		0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL,
-		0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL,
-		0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL,
-		0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL,
-		0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL,
-		0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL,
-		0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL,
-		0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL,
-		0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL
-	}
-};
-
-#ifdef HAS_CPU_64
-
-#define	packV(P, x)	(((((P) >> (x)) & 0x0101010101010101ULL) * 0x0102040810204080ULL) >> 56)
-#define packD(PM)	(((PM) * 0x0101010101010101ULL) >> 56)
-
-#else
-
-#define	packV(P, x)	(((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24)
-#define	packD(PM)	(((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24)
-
-#endif // __x86_64__
-
-/**
- * Count last flipped discs when playing on the last empty.
- *
- * @param pos the last empty square.
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-inline int last_flip(int pos, unsigned long long P)
-{
-	unsigned long long PM;
-	unsigned char	n_flipped;
-	int	x = pos & 0x07;
-	int	y = pos >> 3;
-
-	n_flipped  = COUNT_FLIP[y][packV(P, x)];
-	n_flipped += COUNT_FLIP[x][(unsigned char) (P >> (y * 8))];
-	PM = P & mask_d[0][pos];
-	n_flipped += COUNT_FLIP[x][packD(PM)];
-	PM = P & mask_d[1][pos];
-	n_flipped += COUNT_FLIP[x][packD(PM)];
-
-	return n_flipped;
-}
->>>>>>> b3f048d (copyright changes)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/count_last_flip_sse.c b/src/count_last_flip_sse.c
index 5ebc65c..45fe75d 100644
--- a/src/count_last_flip_sse.c
+++ b/src/count_last_flip_sse.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file count_last_flip_sse.c
  *
@@ -21,51 +17,18 @@
  * For optimization purpose, the value returned is twice the number of flipped
  * disc, to facilitate the computation of disc difference.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 1998 - 2023
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
  * 
  */
 
 #include "bit.h"
-<<<<<<< HEAD
-<<<<<<< HEAD
-#include <stdint.h>
-
-<<<<<<< HEAD
-/** precomputed count flip array */
-const uint8_t COUNT_FLIP[8][256] = {
-=======
-=======
 #include <stdint.h>
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 
-#define	DUPLO	0x44
-
-=======
->>>>>>> 593fff4 (use appropriate _mm_set1)
 /** precomputed count flip array */
-<<<<<<< HEAD
-const unsigned char COUNT_FLIP[8][256] = {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 const uint8_t COUNT_FLIP[8][256] = {
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 	{
 		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
 		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
@@ -226,51 +189,12 @@ const V4DI mask_dvhd[64] = {
 
 int last_flip(int pos, unsigned long long P)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	uint_fast8_t	n_flips;
 	unsigned int	t;
 	const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
 	const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
   #ifdef AVXLASTFLIP	// no gain
 	__m256i PP = _mm256_set1_epi64x(P);
-<<<<<<< HEAD
-
-	n_flips  = COUNT_FLIP_X[(P >> (pos & 0x38)) & 0xFF];
-    #ifdef __AVX512VL__
-    	t = _cvtmask32_u32(_mm256_test_epi8_mask(PP, mask_dvhd[pos].v4));
-    #else
-	t = _mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), _mm256_and_si256(PP, mask_dvhd[pos].v4)));
-    #endif
-	n_flips += COUNT_FLIP_Y[t & 0xFF];
-	t >>= 16;
-
-  #else
-	__m128i PP = _mm_set1_epi64x(P);
-	__m128i II = _mm_sad_epu8(_mm_and_si128(PP, mask_dvhd[pos].v2[0]), _mm_setzero_si128());
-
-	n_flips  = COUNT_FLIP_X[_mm_extract_epi16(II, 4)];
-	n_flips += COUNT_FLIP_X[_mm_cvtsi128_si32(II)];
-    #ifdef __AVX512VL__
-    	t = _cvtmask16_u32(_mm_test_epi8_mask(PP, mask_dvhd[pos].v2[1]));
-    #else
-	t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(PP, mask_dvhd[pos].v2[1])));
-    #endif
-  #endif
-	n_flips += COUNT_FLIP_Y[t >> 8];
-	n_flips += COUNT_FLIP_Y[t & 0xFF];
-=======
-	unsigned char	n_flips;
-=======
-	uint_fast8_t	n_flips;
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
-	unsigned int	t;
-	const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-  #ifdef AVXLASTFLIP	// no gain
-	__m256i PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P));
-=======
->>>>>>> 593fff4 (use appropriate _mm_set1)
 
 	n_flips  = COUNT_FLIP_X[(P >> (pos & 0x38)) & 0xFF];
     #ifdef __AVX512VL__
@@ -294,242 +218,8 @@ int last_flip(int pos, unsigned long long P)
     #endif
   #endif
 	n_flips += COUNT_FLIP_Y[t >> 8];
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flips += COUNT_FLIP_Y[(unsigned char) t];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	n_flips += COUNT_FLIP_Y[t & 0xff];
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-=======
 	n_flips += COUNT_FLIP_Y[t & 0xFF];
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 
 	return n_flips;
 }
 
-<<<<<<< HEAD
-=======
-/**
- * @file count_last_flip_sse.c
- *
- *
- * A function is provided to count the number of fipped disc of the last move.
- *
- * The basic principle is to read into an array a precomputed result. Doing
- * this is easy for a single line ; as we can use arrays of the form:
- *  - COUNT_FLIP[square where we play][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by the 
- * SSE PMOVMSKB or PSADBW instruction.
- * Once we get our 8-bits disc patterns, we directly get the number of
- * flipped discs from the precomputed array, and add them from each flipping
- * lines.
- * For optimization purpose, the value returned is twice the number of flipped
- * disc, to facilitate the computation of disc difference.
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-#include "bit.h"
-
-/** precomputed count flip array */
-const unsigned char COUNT_FLIP[8][256] = {
-	{
-		 0,  0,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		12, 12,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		10, 10,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-		 8,  8,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,  6,  6,  0,  0,  2,  2,  0,  0,  4,  4,  0,  0,  2,  2,  0,  0,
-	},
-	{
-		 0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		10, 10, 10, 10,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 8,  8,  8,  8,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-		 6,  6,  6,  6,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  2,  2,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 8, 10,  8,  8,  8, 10,  8,  8,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 6,  8,  6,  6,  6,  8,  6,  6,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-		 4,  6,  4,  4,  4,  6,  4,  4,  0,  2,  0,  0,  0,  2,  0,  0,  2,  4,  2,  2,  2,  4,  2,  2,  0,  2,  0,  0,  0,  2,  0,  0,
-	},
-	{
-		 0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 6, 10,  8,  8,  6,  6,  6,  6,  6, 10,  8,  8,  6,  6,  6,  6,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 4,  8,  6,  6,  4,  4,  4,  4,  4,  8,  6,  6,  4,  4,  4,  4,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-		 2,  6,  4,  4,  2,  2,  2,  2,  2,  6,  4,  4,  2,  2,  2,  2,  0,  4,  2,  2,  0,  0,  0,  0,  0,  4,  2,  2,  0,  0,  0,  0,
-	},
-	{
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  4,  4,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 2, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  8,  6,  6,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 10,  8,  8,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-	{
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0, 12, 10, 10,  8,  8,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-		 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	},
-};
-
-/* bit masks for diagonal lines */
-const V4DI mask_dvhd[64] = {
-	{{ 0x0000000000000001, 0x00000000000000ff, 0x0101010101010101, 0x8040201008040201 }},
-	{{ 0x0000000000000102, 0x00000000000000ff, 0x0202020202020202, 0x0080402010080402 }},
-	{{ 0x0000000000010204, 0x00000000000000ff, 0x0404040404040404, 0x0000804020100804 }},
-	{{ 0x0000000001020408, 0x00000000000000ff, 0x0808080808080808, 0x0000008040201008 }},
-	{{ 0x0000000102040810, 0x00000000000000ff, 0x1010101010101010, 0x0000000080402010 }},
-	{{ 0x0000010204081020, 0x00000000000000ff, 0x2020202020202020, 0x0000000000804020 }},
-	{{ 0x0001020408102040, 0x00000000000000ff, 0x4040404040404040, 0x0000000000008040 }},
-	{{ 0x0102040810204080, 0x00000000000000ff, 0x8080808080808080, 0x0000000000000080 }},
-	{{ 0x0000000000000102, 0x000000000000ff00, 0x0101010101010101, 0x4020100804020100 }},
-	{{ 0x0000000000010204, 0x000000000000ff00, 0x0202020202020202, 0x8040201008040201 }},
-	{{ 0x0000000001020408, 0x000000000000ff00, 0x0404040404040404, 0x0080402010080402 }},
-	{{ 0x0000000102040810, 0x000000000000ff00, 0x0808080808080808, 0x0000804020100804 }},
-	{{ 0x0000010204081020, 0x000000000000ff00, 0x1010101010101010, 0x0000008040201008 }},
-	{{ 0x0001020408102040, 0x000000000000ff00, 0x2020202020202020, 0x0000000080402010 }},
-	{{ 0x0102040810204080, 0x000000000000ff00, 0x4040404040404040, 0x0000000000804020 }},
-	{{ 0x0204081020408000, 0x000000000000ff00, 0x8080808080808080, 0x0000000000008040 }},
-	{{ 0x0000000000010204, 0x0000000000ff0000, 0x0101010101010101, 0x2010080402010000 }},
-	{{ 0x0000000001020408, 0x0000000000ff0000, 0x0202020202020202, 0x4020100804020100 }},
-	{{ 0x0000000102040810, 0x0000000000ff0000, 0x0404040404040404, 0x8040201008040201 }},
-	{{ 0x0000010204081020, 0x0000000000ff0000, 0x0808080808080808, 0x0080402010080402 }},
-	{{ 0x0001020408102040, 0x0000000000ff0000, 0x1010101010101010, 0x0000804020100804 }},
-	{{ 0x0102040810204080, 0x0000000000ff0000, 0x2020202020202020, 0x0000008040201008 }},
-	{{ 0x0204081020408000, 0x0000000000ff0000, 0x4040404040404040, 0x0000000080402010 }},
-	{{ 0x0408102040800000, 0x0000000000ff0000, 0x8080808080808080, 0x0000000000804020 }},
-	{{ 0x0000000001020408, 0x00000000ff000000, 0x0101010101010101, 0x1008040201000000 }},
-	{{ 0x0000000102040810, 0x00000000ff000000, 0x0202020202020202, 0x2010080402010000 }},
-	{{ 0x0000010204081020, 0x00000000ff000000, 0x0404040404040404, 0x4020100804020100 }},
-	{{ 0x0001020408102040, 0x00000000ff000000, 0x0808080808080808, 0x8040201008040201 }},
-	{{ 0x0102040810204080, 0x00000000ff000000, 0x1010101010101010, 0x0080402010080402 }},
-	{{ 0x0204081020408000, 0x00000000ff000000, 0x2020202020202020, 0x0000804020100804 }},
-	{{ 0x0408102040800000, 0x00000000ff000000, 0x4040404040404040, 0x0000008040201008 }},
-	{{ 0x0810204080000000, 0x00000000ff000000, 0x8080808080808080, 0x0000000080402010 }},
-	{{ 0x0000000102040810, 0x000000ff00000000, 0x0101010101010101, 0x0804020100000000 }},
-	{{ 0x0000010204081020, 0x000000ff00000000, 0x0202020202020202, 0x1008040201000000 }},
-	{{ 0x0001020408102040, 0x000000ff00000000, 0x0404040404040404, 0x2010080402010000 }},
-	{{ 0x0102040810204080, 0x000000ff00000000, 0x0808080808080808, 0x4020100804020100 }},
-	{{ 0x0204081020408000, 0x000000ff00000000, 0x1010101010101010, 0x8040201008040201 }},
-	{{ 0x0408102040800000, 0x000000ff00000000, 0x2020202020202020, 0x0080402010080402 }},
-	{{ 0x0810204080000000, 0x000000ff00000000, 0x4040404040404040, 0x0000804020100804 }},
-	{{ 0x1020408000000000, 0x000000ff00000000, 0x8080808080808080, 0x0000008040201008 }},
-	{{ 0x0000010204081020, 0x0000ff0000000000, 0x0101010101010101, 0x0402010000000000 }},
-	{{ 0x0001020408102040, 0x0000ff0000000000, 0x0202020202020202, 0x0804020100000000 }},
-	{{ 0x0102040810204080, 0x0000ff0000000000, 0x0404040404040404, 0x1008040201000000 }},
-	{{ 0x0204081020408000, 0x0000ff0000000000, 0x0808080808080808, 0x2010080402010000 }},
-	{{ 0x0408102040800000, 0x0000ff0000000000, 0x1010101010101010, 0x4020100804020100 }},
-	{{ 0x0810204080000000, 0x0000ff0000000000, 0x2020202020202020, 0x8040201008040201 }},
-	{{ 0x1020408000000000, 0x0000ff0000000000, 0x4040404040404040, 0x0080402010080402 }},
-	{{ 0x2040800000000000, 0x0000ff0000000000, 0x8080808080808080, 0x0000804020100804 }},
-	{{ 0x0001020408102040, 0x00ff000000000000, 0x0101010101010101, 0x0201000000000000 }},
-	{{ 0x0102040810204080, 0x00ff000000000000, 0x0202020202020202, 0x0402010000000000 }},
-	{{ 0x0204081020408000, 0x00ff000000000000, 0x0404040404040404, 0x0804020100000000 }},
-	{{ 0x0408102040800000, 0x00ff000000000000, 0x0808080808080808, 0x1008040201000000 }},
-	{{ 0x0810204080000000, 0x00ff000000000000, 0x1010101010101010, 0x2010080402010000 }},
-	{{ 0x1020408000000000, 0x00ff000000000000, 0x2020202020202020, 0x4020100804020100 }},
-	{{ 0x2040800000000000, 0x00ff000000000000, 0x4040404040404040, 0x8040201008040201 }},
-	{{ 0x4080000000000000, 0x00ff000000000000, 0x8080808080808080, 0x0080402010080402 }},
-	{{ 0x0102040810204080, 0xff00000000000000, 0x0101010101010101, 0x0100000000000000 }},
-	{{ 0x0204081020408000, 0xff00000000000000, 0x0202020202020202, 0x0201000000000000 }},
-	{{ 0x0408102040800000, 0xff00000000000000, 0x0404040404040404, 0x0402010000000000 }},
-	{{ 0x0810204080000000, 0xff00000000000000, 0x0808080808080808, 0x0804020100000000 }},
-	{{ 0x1020408000000000, 0xff00000000000000, 0x1010101010101010, 0x1008040201000000 }},
-	{{ 0x2040800000000000, 0xff00000000000000, 0x2020202020202020, 0x2010080402010000 }},
-	{{ 0x4080000000000000, 0xff00000000000000, 0x4040404040404040, 0x4020100804020100 }},
-	{{ 0x8000000000000000, 0xff00000000000000, 0x8080808080808080, 0x8040201008040201 }}
-};
-
-/**
- * Count last flipped discs when playing on the last empty.
- *
- * @param pos the last empty square.
- * @param P player's disc pattern.
- * @return flipped disc count.
- */
-
-int last_flip(int pos, unsigned long long P)
-{
-	unsigned char	n_flips;
-	unsigned int	t;
-	const unsigned char *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-#ifdef AVXLASTFLIP
-	__m256i	MP = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)), mask_dvhd[pos].v4);
-
-	n_flips  = COUNT_FLIP_X[(unsigned char) (P >> (pos & 0x38))];
-	t = _mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), MP));
-	n_flips += COUNT_FLIP_Y[(unsigned char) t];
-	t >>= 16;
-#else
-	__m128i	PP, II;
-
-	PP = _mm_cvtsi64_si128(P);
-	PP = _mm_unpacklo_epi64(PP, PP);
-	II = _mm_sad_epu8(_mm_and_si128(PP, mask_dvhd[pos].v2[0]), _mm_setzero_si128());
-	n_flips  = COUNT_FLIP_X[_mm_cvtsi128_si32(II)];
-	n_flips += COUNT_FLIP_X[_mm_extract_epi16(II, 4)];
-	t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(PP, mask_dvhd[pos].v2[1])));
-#endif
-	n_flips += COUNT_FLIP_Y[t >> 8];
-	n_flips += COUNT_FLIP_Y[(unsigned char) t];
-
-	return n_flips;
-}
-<<<<<<< HEAD
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/edax.c b/src/edax.c
index 5801328..e25b975 100644
--- a/src/edax.c
+++ b/src/edax.c
@@ -173,15 +173,7 @@ void help_options(void)
 		"  verbose [n]          set Edax verbosity (default 1).\n"
 		"  noise [n]            start displaying Edax search result from this depth\n  (default 5).\n"
 		"  witdh [n]            display edax search results using <width> characters\n  (default 80).\n"
-<<<<<<< HEAD
-<<<<<<< HEAD
 		"  hash-table-size [n]  set hashtable size (default 22 bits).\n"
-=======
-		"  hash-table-size [n]  set hashtable size (default 18 bits).\n"
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-		"  hash-table-size [n]  set hashtable size (default 22 bits).\n"
->>>>>>> 42dc349 (add sfence to be sure; correct comments)
 		"  n-tasks [n]          control the number of parallel threads used in searching\n  (default 1).\n"
 		"  l|level [n]          search using limited depth (default 21).\n"
 		"  t|game-time <time>   search using limited time per game.\n"
@@ -257,10 +249,6 @@ void help_book(void)
  */
 void help_base(void) 
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	printf(	"\nGame DataBase :\n"
 		"  convert [file_in] [file_out]     convert between different format.\n"
 		"  unique [file_in] [file_out]      remove doublons in the base.\n"
@@ -268,18 +256,6 @@ void help_base(void)
 		"  correct [file_in] [n]            correct error in the last <n> moves.\n"
 		"  complete [file_in]               complete a database by playing the last\n  missing moves.\n"
 		"  problem [file_in] [n] [file_out] build a set of <n> problems from a game\n  database.\n");
-<<<<<<< HEAD
-=======
-	printf("\nGame DataBase :\n");
-	printf("  convert [file_in] [file_out]     convert between different format.\n");
-	printf("  unique [file_in] [file_out]      remove doublons in the base.\n");
-	printf("  check [file_in] [n]              check error in the last <n> moves.\n");
-	printf("  correct [file_in] [n]            correct error in the last <n> moves.\n");
-	printf("  complete [file_in]               complete a database by playing the last\n  missing moves.\n");
-	printf("  problem [file_in] [n] [file_out] build a set of <n> problems from a game\n  database.\n");
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 }
 
 /**
diff --git a/src/empty.h b/src/empty.h
index 87c3a39..e8ba902 100644
--- a/src/empty.h
+++ b/src/empty.h
@@ -42,18 +42,8 @@ static inline void empty_restore(SquareList *empty, int index)
 }
 
 /** Loop over all empty squares */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #define foreach_empty(index, empty)\
 	for ((index) = (empty)[NOMOVE].next; index != NOMOVE; (index) = (empty)[index].next)
-=======
-#define foreach_empty(empty, list)\
-	for ((empty) = (list)->next; (empty)->next; (empty) = (empty)->next)
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-#define foreach_empty(index, empty)\
-	for ((index) = (empty)[NOMOVE].next; index != NOMOVE; (index) = (empty)[index].next)
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
 
 #endif
 
diff --git a/src/endgame.c b/src/endgame.c
index 0c1583c..375cb74 100644
--- a/src/endgame.c
+++ b/src/endgame.c
@@ -3,34 +3,9 @@
  *
  * Search near the end of the game.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2023
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
- * @date 1998 - 2024
->>>>>>> d8589d2 (Init 4.5.3: abandon size_reduced_movelist which confuses gcc warn)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
-=======
- * @date 1998 - 2022
  * @author Richard Delorme
-<<<<<<< HEAD
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-=======
  * @author Toshihiko Okuhara
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
  * @version 4.5
  */
 
@@ -56,45 +31,18 @@
 	#include "count_last_flip_32.c"
 #elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2
 	#include "count_last_flip_bmi2.c"
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
 #elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_AVX_PPFILL
 	#include "count_last_flip_avx_ppfill.c"
 #elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_AVX512
 	#include "count_last_flip_avx512cd.c"
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 #elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_NEON
 	#include "count_last_flip_neon.c"
 #elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SVE
 	#include "count_last_flip_sve_lzcnt.c"
-<<<<<<< HEAD
-=======
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-=======
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 #else // LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN
 	#include "count_last_flip_kindergarten.c"
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-#if ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)) && (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE)
-	#include "endgame_sse.c"	// vectorcall version
-#elif (MOVE_GENERATOR == MOVE_GENERATOR_NEON) && (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE)
-	#include "endgame_neon.c"
-#endif
-
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 /**
  * @brief Get the final score.
  *
@@ -104,43 +52,17 @@
  * @param n_empties Number of empty squares remaining on the board.
  * @return The final score, as a disc difference.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static int board_solve(const unsigned long long player, const int n_empties)
-{
-	int score = bit_count(player) * 2 - SCORE_MAX;	// in case of opponents win
-=======
-static int board_solve(const Board *board, const int n_empties)
-{
-	int score = bit_count(board->player) * 2 - SCORE_MAX;	// in case of opponents win
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 static int board_solve(const unsigned long long player, const int n_empties)
 {
 	int score = bit_count(player) * 2 - SCORE_MAX;	// in case of opponents win
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 	int diff = score + n_empties;		// = n_discs_p - (64 - n_empties - n_discs_p)
 
 	SEARCH_STATS(++statistics.n_search_solve);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (diff == 0)
-		score = diff;
-	else if (diff > 0)
-		score = diff + n_empties;
-=======
-	if (diff >= 0)
-		score = diff;
-	if (diff > 0)
-		score += n_empties;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	if (diff == 0)
 		score = diff;
 	else if (diff > 0)
 		score = diff + n_empties;
->>>>>>> c0fb778 (small optimizations in endgame)
 	return score;
 }
 
@@ -154,23 +76,7 @@ static int board_solve(const unsigned long long player, const int n_empties)
  */
 int search_solve(const Search *search)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	return board_solve(search->board.player, search->eval.n_empties);
-=======
-	return board_solve(search->board, search->n_empties);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	return board_solve(&search->board, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	return board_solve(&search->board, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-	return board_solve(search->board.player, search->eval.n_empties);
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 }
 
 /**
@@ -185,53 +91,14 @@ int search_solve_0(const Search *search)
 {
 	SEARCH_STATS(++statistics.n_search_solve_0);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	return 2 * bit_count(search->board.player) - SCORE_MAX;
-=======
-	return 2 * bit_count(search->board->player) - SCORE_MAX;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	return 2 * bit_count(search->board.player) - SCORE_MAX;
->>>>>>> 0a166fd (Remove 1 element array coding style)
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)) && ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER >= COUNT_LAST_FLIP_BMI2))
 	#include "endgame_sse.c"	// vectorcall version
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 #elif ((MOVE_GENERATOR == MOVE_GENERATOR_NEON) || (MOVE_GENERATOR == MOVE_GENERATOR_SVE)) && ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_NEON) || ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SVE) && defined(SIMULLASTFLIP)))
 	#include "endgame_neon.c"
 #else
-=======
-#if ((MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON)) || (LAST_FLIP_COUNTER != COUNT_LAST_FLIP_SSE)
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
-#if ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)) && (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE)
-=======
-#if ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)) && (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE)
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
-#if ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)) && ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2))
->>>>>>> 9ea5b5e (BMI2 and mm_LastFlip version of board_score_sse_1 added (but not enabled))
-=======
-#if ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE)) && ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER >= COUNT_LAST_FLIP_BMI2))
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-	#include "endgame_sse.c"	// vectorcall version
-#elif (MOVE_GENERATOR == MOVE_GENERATOR_NEON) && (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE)
-=======
-#elif ((MOVE_GENERATOR == MOVE_GENERATOR_NEON) || (MOVE_GENERATOR == MOVE_GENERATOR_SVE)) && (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE)
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
-	#include "endgame_neon.c"
-#else
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 /**
  * @brief Get the final score.
  *
@@ -239,100 +106,26 @@ int search_solve_0(const Search *search)
  * The following code has been adapted from Zebra by Gunnar Anderson.
  *
  * @param player Board.player to evaluate.
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @param alpha  Alpha bound. (beta - 1)
-=======
- * @param beta   Beta bound.
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-=======
- * @param beta   Beta bound - 1.
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
- * @param alpha  Alpha bound. (beta - 1)
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
  * @param x      Last empty square to play.
  * @return       The final score, as a disc difference.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 int board_score_1(const unsigned long long player, const int alpha, const int x)
-=======
-int board_score_1(const Board *board, const int beta, const int x)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 {
 	int score, score2, n_flips;
 
 	score = 2 * bit_count(player) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	n_flips = last_flip(x, player);
-	score += n_flips;
-
-	if (n_flips == 0) {	// (23%)
-		score2 = score - 2;	// empty for opponent
-		if (score <= 0)
-			score = score2;
-		if (score > alpha) {	// lazy cut-off (40%)
-			if ((n_flips = last_flip(x, ~player)) != 0)	// (98%)
-				score = score2 - n_flips;
-=======
-	if ((n_flips = last_flip(x, board->player)) != 0) {
-		score -= n_flips;
-	} else {
-=======
-	n_flips = last_flip(x, board->player);
-=======
-int board_score_1(const unsigned long long player, const int beta, const int x)
-=======
-int board_score_1(const unsigned long long player, const int alpha, const int x)
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-{
-	int score, score2, n_flips;
-
-	score = 2 * bit_count(player) - SCORE_MAX  + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-
 	n_flips = last_flip(x, player);
-<<<<<<< HEAD
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-	score -= n_flips;
-=======
 	score += n_flips;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 
-<<<<<<< HEAD
-	if (n_flips == 0) {
->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c)
-=======
 	if (n_flips == 0) {	// (23%)
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-		score2 = score + 2;	// empty for player
-=======
-		score2 = score + 2;	// empty for opponent
->>>>>>> 0ba5408 (add vectorcall to inline functions in case not inlined)
-		if (score >= 0)
-=======
 		score2 = score - 2;	// empty for opponent
 		if (score <= 0)
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 			score = score2;
 		if (score > alpha) {	// lazy cut-off (40%)
 			if ((n_flips = last_flip(x, ~player)) != 0)	// (98%)
-<<<<<<< HEAD
-				score = score2 + n_flips;
-<<<<<<< HEAD
-			}
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c)
-=======
 				score = score2 - n_flips;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 		}
 	}
 
@@ -342,15 +135,7 @@ int board_score_1(const unsigned long long player, const int alpha, const int x)
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final min score, when 2 empty squares remain.
-=======
- * Get the final max score, when 2 empty squares remain.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * Get the final min score, when 2 empty squares remain.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  *
  * @param player Board.player to evaluate.
  * @param opponent Board.opponent to evaluate.
@@ -358,66 +143,9 @@ int board_score_1(const unsigned long long player, const int alpha, const int x)
  * @param x1 First empty square coordinate.
  * @param x2 Second empty square coordinate.
  * @param n_nodes Node counter.
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @return The final min score, as a disc difference.
- */
-<<<<<<< HEAD
-static int board_solve_2(unsigned long long player, unsigned long long opponent, int alpha, int x1, int x2, volatile unsigned long long *n_nodes)
-{
-	unsigned long long flipped;
-	int score, bestscore, nodes;
-	// const int beta = alpha + 1;
-
-	SEARCH_STATS(++statistics.n_board_solve_2);
-
-	if ((NEIGHBOUR[x1] & opponent) && (flipped = Flip(x1, player, opponent))) {	// (84%/84%)
-		bestscore = board_score_1(opponent ^ flipped, alpha, x2);
-
-		if ((bestscore > alpha) && (NEIGHBOUR[x2] & opponent) && (flipped = Flip(x2, player, opponent))) {	// (50%/93%/92%)
-			score = board_score_1(opponent ^ flipped, alpha, x1);
-			if (score < bestscore)
-				bestscore = score;
-			nodes = 3;
-		} else	nodes = 2;
-
-	} else if ((NEIGHBOUR[x2] & opponent) && (flipped = Flip(x2, player, opponent))) {	// (96%/75%)
-		bestscore = board_score_1(opponent ^ flipped, alpha, x1);
-		nodes = 2;
-
-	} else {	// pass (17%) - NEIGHBOUR test is almost 100% true
-		alpha = ~alpha;	// = -alpha - 1
-		if ((flipped = Flip(x1, opponent, player))) {	// (95%)
-			bestscore = board_score_1(player ^ flipped, alpha, x2);
-
-			if ((bestscore > alpha) && (flipped = Flip(x2, opponent, player))) {	// (20%/100%)
-				score = board_score_1(player ^ flipped, alpha, x1);
-				if (score < bestscore)
-					bestscore = score;
-				nodes = 3;
-			} else	nodes = 2;
-
-		} else if ((flipped = Flip(x2, opponent, player))) {	// (97%)
-			bestscore = board_score_1(player ^ flipped, alpha, x1);
-			nodes = 2;
-
-		} else {	// gameover
-			bestscore = board_solve(player, 2);
-			nodes = 1;
-=======
-static int board_solve_2(Board *board, int alpha, const int x1, const int x2, volatile unsigned long long *n_nodes)
-=======
- * @return The final score, as a disc difference.
-=======
- * @return The final max score, as a disc difference.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * @return The final min score, as a disc difference.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  */
 static int board_solve_2(unsigned long long player, unsigned long long opponent, int alpha, int x1, int x2, volatile unsigned long long *n_nodes)
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 {
 	unsigned long long flipped;
 	int score, bestscore, nodes;
@@ -451,153 +179,37 @@ static int board_solve_2(unsigned long long player, unsigned long long opponent,
 				nodes = 3;
 			} else	nodes = 2;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (bestscore > alpha) {
-				if ((NEIGHBOUR[x2] & board->player) && board_pass_next(board, x2, &next)) {
-					SEARCH_UPDATE_INTERNAL_NODES(nodes);
-					score = -board_score_1(&next, -alpha, x1);
-					if (score < bestscore) bestscore = score;
-				}
-				// gameover
-				if (bestscore == SCORE_INF) bestscore = board_solve(board, 2);
-			}
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-		} else if ((NEIGHBOUR[x2] & board->player) && board_pass_next(board, x2, &next)) {
-			bestscore = -board_score_1(&next, -alpha, x1);
-=======
-		} else if ((NEIGHBOUR[x2] & board->player) && (flipped = Flip(x2, board->opponent, board->player))) {
-			bestscore = -board_score_1(board->player ^ flipped, -alpha, x1);
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-=======
-		} else if ((NEIGHBOUR[x2] & player) && (flipped = Flip(x2, opponent, player))) {
-=======
 		} else if ((flipped = Flip(x2, opponent, player))) {	// (97%)
-<<<<<<< HEAD
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-			bestscore = -board_score_1(player ^ flipped, -alpha, x1);
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-=======
 			bestscore = board_score_1(player ^ flipped, alpha, x1);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
 			nodes = 2;
 
 		} else {	// gameover
 			bestscore = board_solve(player, 2);
 			nodes = 1;
->>>>>>> 46e4b64 (Optimize endgame (esp. 2 empties) score comparisons)
 		}
 		bestscore = -bestscore;
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	SEARCH_UPDATE_2EMPTIES_NODES(*n_nodes += nodes;)
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	assert((bestscore & 1) == 0);
-=======
-	*n_nodes += nodes;
- 	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
- 	assert((bestscore & 1) == 0);
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	SEARCH_UPDATE_2EMPTIES_NODES(*n_nodes += nodes;)
 	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
 	assert((bestscore & 1) == 0);
->>>>>>> 46e4b64 (Optimize endgame (esp. 2 empties) score comparisons)
 	return bestscore;
 }
 
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * Get the final max score, when 3 empty squares remain.
-=======
- * Get the final min score, when 3 empty squares remain.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
- * Get the final max score, when 3 empty squares remain.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @param player Board.player to evaluate.
- * @param opponent Board.opponent to evaluate.
-=======
- * @param board The board to evaluate.
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-=======
- * @param board The board to evaluate. (may be broken)
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
  * @param player Board.player to evaluate.
  * @param opponent Board.opponent to evaluate.
->>>>>>> 92a4ad9 (Expand board to 2 ULLs in non-SSE search_solve_3 and _4)
  * @param alpha Alpha bound.
  * @param sort3 Parity flags.
  * @param x1 First empty square coordinate.
  * @param x2 Second empty square coordinate.
  * @param x3 Third empty square coordinate.
  * @param n_nodes Node counter.
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @return The final max score, as a disc difference.
- */
-<<<<<<< HEAD
-static int search_solve_3(unsigned long long player, unsigned long long opponent, int alpha, int sort3, int x1, int x2, int x3, volatile unsigned long long *n_nodes)
-{
-<<<<<<< HEAD
-	unsigned long long flipped, next_player, next_opponent;
-	int score, bestscore, pol, tmp;
-	// const int beta = alpha + 1;
-
-	SEARCH_STATS(++statistics.n_search_solve_3);
-	SEARCH_UPDATE_INTERNAL_NODES(*n_nodes);
-=======
-	Board *board = search->board;
-=======
-static int search_solve_3(Search *search, const int alpha, Board *board, unsigned int parity)
-{
-<<<<<<< HEAD
->>>>>>> 6506166 (More SSE optimizations)
-	Board next[1];
-=======
-	Board next;
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	SquareList *empty = search->empties->next;
-	int x1 = empty->x;
-	int x2 = (empty = empty->next)->x;
-	int x3 = empty->next->x;
-=======
-	int x1 = search->empties[NOMOVE].next;
-	int x2 = search->empties[x1].next;
-	int x3 = search->empties[x2].next;
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	int score, bestscore;
-	// const int beta = alpha + 1;
-
-	SEARCH_STATS(++statistics.n_search_solve_3);
-	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-	// parity based move sorting
-<<<<<<< HEAD
-=======
- * @return The final score, as a disc difference.
-=======
- * @return The final min score, as a disc difference.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * @return The final max score, as a disc difference.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  */
 static int search_solve_3(unsigned long long player, unsigned long long opponent, int alpha, int sort3, int x1, int x2, int x3, volatile unsigned long long *n_nodes)
 {
@@ -609,7 +221,6 @@ static int search_solve_3(unsigned long long player, unsigned long long opponent
 	SEARCH_UPDATE_INTERNAL_NODES(*n_nodes);
 
 	// parity based move sorting
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 	switch (sort3 & 0x03) {
 		case 1:
 			tmp = x1; x1 = x2; x2 = tmp;	// case 1(x2) 2(x1 x3)
@@ -620,94 +231,12 @@ static int search_solve_3(unsigned long long player, unsigned long long opponent
 		case 3:
 			tmp = x2; x2 = x3; x3 = tmp;
 			break;
-<<<<<<< HEAD
-=======
-	if (!(parity & QUADRANT_ID[x1])) {
-		if (parity & QUADRANT_ID[x2]) { // case 1(x2) 2(x1 x3)
-			int tmp = x1; x1 = x2; x2 = tmp;
-		} else { // case 1(x3) 2(x1 x2)
-			int tmp = x1; x1 = x3; x3 = x2; x2 = tmp;
-		}
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-	}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	bestscore = -SCORE_INF;
-	pol = 1;
-	do {
-		// best move alphabeta search
-		if ((NEIGHBOUR[x1] & opponent) && (flipped = Flip(x1, player, opponent))) {	// (89%/91%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x1));
-			bestscore = board_solve_2(next_player, next_opponent, alpha, x2, x3, n_nodes);
-			if (bestscore > alpha) return bestscore * pol;	// (78%/63%)
-		}
-
-		if (/* (NEIGHBOUR[x2] & opponent) && */ (flipped = Flip(x2, player, opponent))) {	// (97%/78%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x2));
-			score = board_solve_2(next_player, next_opponent, alpha, x1, x3, n_nodes);
-			if (score > alpha) return score * pol;	// (32%/9%)
-			else if (score > bestscore) bestscore = score;
-=======
-	// best move alphabeta search
-	bestscore = -SCORE_INF;
-	if ((NEIGHBOUR[x1] & board->opponent) && (flipped = board_flip(board, x1))) {
-		next_player = board->opponent ^ flipped;
-		next_opponent = board->player ^ (flipped | x_to_bit(x1));
-		bestscore = -board_solve_2(next_player, next_opponent, -(alpha + 1), x2, x3, n_nodes);
-		if (bestscore > alpha) return bestscore;
-	}
-
-	if ((NEIGHBOUR[x2] & board->opponent) && (flipped = board_flip(board, x2))) {
-		next_player = board->opponent ^ flipped;
-		next_opponent = board->player ^ (flipped | x_to_bit(x2));
-		score = -board_solve_2(next_player, next_opponent, -(alpha + 1), x1, x3, n_nodes);
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	if ((NEIGHBOUR[x3] & board->opponent) && (flipped = board_flip(board, x3))) {
-		next_player = board->opponent ^ flipped;
-		next_opponent = board->player ^ (flipped | x_to_bit(x3));
-		score = -board_solve_2(next_player, next_opponent, -(alpha + 1), x1, x2, n_nodes);
-		if (score > bestscore) bestscore = score;
 	}
 
-	// pass ?
-	else if (bestscore == -SCORE_INF) {
-=======
-	for (pol = 1; pol >= -1; pol -= 2) {
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	pol = 1;
-	do {
->>>>>>> c0fb778 (small optimizations in endgame)
-		// best move alphabeta search
-		bestscore = -SCORE_INF;
-=======
-	pol = -1;
-	do {
-		// best move alphabeta search
-		alpha = ~alpha;	// = -(alpha + 1)
-		bestscore = SCORE_INF;	// Negative score
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
-	bestscore = SCORE_INF;	// min stage
-=======
 	bestscore = -SCORE_INF;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 	pol = 1;
 	do {
 		// best move alphabeta search
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
 		if ((NEIGHBOUR[x1] & opponent) && (flipped = Flip(x1, player, opponent))) {	// (89%/91%)
 			next_player = opponent ^ flipped;
 			next_opponent = player ^ (flipped | x_to_bit(x1));
@@ -723,182 +252,36 @@ static int search_solve_3(unsigned long long player, unsigned long long opponent
 			else if (score > bestscore) bestscore = score;
 		}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if ((NEIGHBOUR[x3] & board->player) && (flipped = Flip(x3, board->opponent, board->player))) {
-			next_player = board->player ^ flipped;
-			next_opponent = board->opponent ^ (flipped | x_to_bit(x3));
-			score = board_solve_2(next_player, next_opponent, alpha, x1, x2, n_nodes);
-			if (score < bestscore) bestscore = score;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-		}
-
-<<<<<<< HEAD
 		if (/* (NEIGHBOUR[x3] & opponent) && */ (flipped = Flip(x3, player, opponent))) {	// (100%/89%)
 			next_player = opponent ^ flipped;
 			next_opponent = player ^ (flipped | x_to_bit(x3));
 			score = board_solve_2(next_player, next_opponent, alpha, x1, x2, n_nodes);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (score > bestscore) bestscore = score;
-			return bestscore * pol;	// (26%)
-		}
-=======
-		else if (bestscore == SCORE_INF)	// gameover
-			bestscore = board_solve(board->player, 3);
-=======
-		if (/* (NEIGHBOUR[x3] & board->opponent) && */ (flipped = board_flip(board, x3))) {	// (100%/89%)
-			next_player = board->opponent ^ flipped;
-			next_opponent = board->player ^ (flipped | x_to_bit(x3));
-=======
-		if (/* (NEIGHBOUR[x3] & opponent) && */ (flipped = Flip(x3, player, opponent))) {	// (100%/89%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x3));
->>>>>>> 92a4ad9 (Expand board to 2 ULLs in non-SSE search_solve_3 and _4)
-			score = -board_solve_2(next_player, next_opponent, ~alpha, x1, x2, n_nodes);
-			if (score > bestscore) bestscore = score;
-=======
-			if (score < bestscore) bestscore = score;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
 			if (score > bestscore) bestscore = score;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 			return bestscore * pol;	// (26%)
 		}
 
 		if (bestscore > -SCORE_INF)	// (76%)
 			return bestscore * pol;	// (9%)
 
-<<<<<<< HEAD
-		flipped = player; player = opponent; opponent = flipped;
-		alpha = ~alpha;	// = -(alpha + 1)
-<<<<<<< HEAD
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-	}
->>>>>>> 46e4b64 (Optimize endgame (esp. 2 empties) score comparisons)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (bestscore > -SCORE_INF)	// (76%)
-			return bestscore * pol;	// (9%)
-
-		next_opponent = player; player = opponent; opponent = next_opponent;	// pass
-		alpha = ~alpha;	// = -(alpha + 1)
-=======
->>>>>>> c0fb778 (small optimizations in endgame)
-	} while ((pol = -pol) < 0);
-=======
 		next_opponent = player; player = opponent; opponent = next_opponent;	// pass
-<<<<<<< HEAD
-	} while ((pol = -pol) >= 0);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-
-	return board_solve(player, 3);	// gameover
-=======
-	return board_solve(board->player, 3);	// gameover
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	return board_solve(player, 3);	// gameover
->>>>>>> 92a4ad9 (Expand board to 2 ULLs in non-SSE search_solve_3 and _4)
-=======
 		alpha = ~alpha;	// = -(alpha + 1)
 	} while ((pol = -pol) < 0);
 
-<<<<<<< HEAD
-	return board_solve(opponent, 3);	// gameover
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
 	return board_solve(player, 3);	// gameover
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 }
 
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final min score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final min score, as a disc difference.
-=======
- * Get the final max score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final max score, as a disc difference.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * Get the final min score, when 4 empty squares remain.
  *
  * @param search Search position.
  * @param alpha Upper score value.
  * @return The final min score, as a disc difference.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  */
 static int search_solve_4(Search *search, int alpha)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long player, opponent, flipped, next_player, next_opponent;
-	int x1, x2, x3, x4, tmp, paritysort, score, bestscore, pol;
-	// const int beta = alpha + 1;
-	static const unsigned char parity_case[64] = {	/* x4x3x2x1 = */
-		/*0000*/  0, /*0001*/  0, /*0010*/  1, /*0011*/  9, /*0100*/  2, /*0101*/ 10, /*0110*/ 11, /*0111*/  3,
-		/*0002*/  0, /*0003*/  0, /*0012*/  0, /*0013*/  0, /*0102*/  4, /*0103*/  4, /*0112*/  5, /*0113*/  5,
-		/*0020*/  1, /*0021*/  0, /*0030*/  1, /*0031*/  0, /*0120*/  6, /*0121*/  7, /*0130*/  6, /*0131*/  7,
-		/*0022*/  9, /*0023*/  0, /*0032*/  0, /*0033*/  9, /*0122*/  8, /*0123*/  0, /*0132*/  0, /*0133*/  8,
-		/*0200*/  2, /*0201*/  4, /*0210*/  6, /*0211*/  8, /*0300*/  2, /*0301*/  4, /*0310*/  6, /*0311*/  8,
-		/*0202*/ 10, /*0203*/  4, /*0212*/  7, /*0213*/  0, /*0302*/  4, /*0303*/ 10, /*0312*/  0, /*0313*/  7,
-		/*0220*/ 11, /*0221*/  5, /*0230*/  6, /*0231*/  0, /*0320*/  6, /*0321*/  0, /*0330*/ 11, /*0331*/  5,
-		/*0222*/  3, /*0223*/  5, /*0232*/  7, /*0233*/  8, /*0322*/  8, /*0323*/  7, /*0332*/  5, /*0333*/  3
-	};
-	int sort3;	// for move sorting on 3 empties
-	static const short sort3_shuf[] = {
-		0x0000,	//  0: 1(x1) 3(x2 x3 x4), 1(x1) 1(x2) 2(x3 x4), 1 1 1 1, 4		x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-		0x1100,	//  1: 1(x2) 3(x1 x3 x4)	x4x2x1x3-x3x2x1x4-x2x1x3x4-x1x2x3x4
-		0x2011,	//  2: 1(x3) 3(x1 x2 x4)	x4x3x1x2-x3x1x2x4-x2x3x1x4-x1x3x2x4
-		0x0222,	//  3: 1(x4) 3(x1 x2 x3)	x4x1x2x3-x3x4x1x2-x2x4x1x3-x1x4x2x3
-		0x3000,	//  4: 1(x1) 1(x3) 2(x2 x4)	x4x1x2x3-x2x1x3x4-x3x1x2x4-x1x3x2x4 <- x4x1x3x2-x2x1x3x4-x3x1x2x4-x1x3x2x4
-		0x3300,	//  5: 1(x1) 1(x4) 2(x2 x3)	x3x1x2x4-x2x1x3x4-x4x1x2x3-x1x4x2x3 <- x3x1x4x2-x2x1x4x3-x4x1x2x3-x1x4x2x3
-		0x2000,	//  6: 1(x2) 1(x3) 2(x1 x4)	x4x1x2x3-x1x2x3x4-x3x2x1x4-x2x3x1x4 <- x4x2x3x1-x1x2x3x4-x3x2x1x4-x2x3x1x4
-		0x2300,	//  7: 1(x2) 1(x4) 2(x1 x3)	x3x1x2x4-x1x2x3x4-x4x2x1x3-x2x4x1x3 <- x3x2x4x1-x1x2x4x3-x4x2x1x3-x2x4x1x3
-		0x2200,	//  8: 1(x3) 1(x4) 2(x1 x2)	x2x1x3x4-x1x2x3x4-x4x3x1x2-x3x4x1x2 <- x2x3x4x1-x1x3x4x2-x4x3x1x2-x3x4x1x2
-		0x2200,	//  9: 2(x1 x2) 2(x3 x4)	x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		0x1021,	// 10: 2(x1 x3) 2(x2 x4)	x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		0x0112	// 11: 2(x1 x4) 2(x2 x3)	x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
-	};
-=======
-	Board *board;
-	Board next[1];
-=======
-	Board next;
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	SquareList *empty;
-=======
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	int x1, x2, x3, x4;
-	int score, bestscore;
-<<<<<<< HEAD
-	const int beta = alpha + 1;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	unsigned int parity;
-	// const int beta = alpha + 1;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-=======
-	Board board0, next;
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-	unsigned long long flipped;
-=======
 	unsigned long long player, opponent, flipped, next_player, next_opponent;
->>>>>>> 92a4ad9 (Expand board to 2 ULLs in non-SSE search_solve_3 and _4)
 	int x1, x2, x3, x4, tmp, paritysort, score, bestscore, pol;
 	// const int beta = alpha + 1;
 	static const unsigned char parity_case[64] = {	/* x4x3x2x1 = */
@@ -926,30 +309,12 @@ static int search_solve_4(Search *search, int alpha)
 		0x1021,	// 10: 2(x1 x3) 2(x2 x4)	x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
 		0x0112	// 11: 2(x1 x4) 2(x2 x3)	x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
 	};
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 
 	SEARCH_STATS(++statistics.n_search_solve_4);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
 	// stability cutoff (try 12%, cut 7%)
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (search_SC_NWS_4(search, alpha, &score)) return score;
-
-	x1 = search->empties[NOMOVE].next;
-	x2 = search->empties[x1].next;
-	x3 = search->empties[x2].next;
-	x4 = search->empties[x3].next;
-=======
-	if (search_SC_NWS(search, alpha, &score)) return score;
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	if (search_SC_NWS(search, alpha, 4, &score)) return score;
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
 	if (search_SC_NWS_4(search, alpha, &score)) return score;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 
 	x1 = search->empties[NOMOVE].next;
 	x2 = search->empties[x1].next;
@@ -959,10 +324,6 @@ static int search_solve_4(Search *search, int alpha)
 	// parity based move sorting.
 	// The following hole sizes are possible:
 	//    4 - 1 3 - 2 2 - 1 1 2 - 1 1 1 1
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 	// Only the 1 1 2 case needs move sorting on this ply.
 	paritysort = parity_case[((x3 ^ x4) & 0x24) + ((((x2 ^ x4) & 0x24) * 2 + ((x1 ^ x4) & 0x24)) >> 2)];
 	switch (paritysort) {
@@ -981,42 +342,11 @@ static int search_solve_4(Search *search, int alpha)
 		case 8:	// case 1(x3) 1(x4) 2(x1 x2)
 			tmp = x1; x1 = x3; x3 = tmp; tmp = x2; x2 = x4; x4 = tmp;
 			break;
-<<<<<<< HEAD
-=======
-	// Only the 1 1 2 case needs move sorting.
-	parity = search->eval.parity;
-	if (!(parity & QUADRANT_ID[x1])) {
-		if (parity & QUADRANT_ID[x2]) {
-			if (parity & QUADRANT_ID[x3]) { // case 1(x2) 1(x3) 2(x1 x4)
-				int tmp = x1; x1 = x2; x2 = x3; x3 = tmp;
-			} else { // case 1(x2) 1(x4) 2(x1 x3)
-				int tmp = x1; x1 = x2; x2 = x4; x4 = x3; x3 = tmp;
-			}
-		} else if (parity & QUADRANT_ID[x3]) { // case 1(x3) 1(x4) 2(x1 x2)
-			int tmp = x1; x1 = x3; x3 = tmp; tmp = x2; x2 = x4; x4 = tmp;
-		}
-	} else {
-		if (!(parity & QUADRANT_ID[x2])) {
-			if (parity & QUADRANT_ID[x3]) { // case 1(x1) 1(x3) 2(x2 x4)
-				int tmp = x2; x2 = x3; x3 = tmp;
-			} else { // case 1(x1) 1(x4) 2(x2 x3)
-				int tmp = x2; x2 = x4; x4 = x3; x3 = tmp;
-			}
-		}
->>>>>>> 6506166 (More SSE optimizations)
-=======
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 	}
 	sort3 = sort3_shuf[paritysort];
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	player = search->board.player;
 	opponent = search->board.opponent;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	bestscore = SCORE_INF;	// min stage
 	pol = 1;
 	do {
@@ -1036,62 +366,12 @@ static int search_solve_4(Search *search, int alpha)
 			else if (score < bestscore) bestscore = score;
 		}
 
-<<<<<<< HEAD
 		if ((NEIGHBOUR[x3] & opponent) && (flipped = Flip(x3, player, opponent))) {	// (77%/80%)
 			next_player = opponent ^ flipped;
 			next_opponent = player ^ (flipped | x_to_bit(x3));
 			score = search_solve_3(next_player, next_opponent, alpha, sort3 >> 8, x1, x2, x4, &search->n_nodes);
 			if (score <= alpha) return score * pol;	// (14%)
 			else if (score < bestscore) bestscore = score;
-=======
-	// best move alphabeta search
-	bestscore = -SCORE_INF;
-	if ((NEIGHBOUR[x1] & search->board.opponent) && (flipped = board_flip(&search->board, x1))) {
-		board_flip_next(&search->board, x1, flipped, &next);
-		bestscore = -search_solve_3(&next, -(alpha + 1), sort3, x2, x3, x4, &search->n_nodes);
-		if (bestscore > alpha) return bestscore;
-	}
-
-	if ((NEIGHBOUR[x2] & search->board.opponent) && (flipped = board_flip(&search->board, x2))) {
-		board_flip_next(&search->board, x2, flipped, &next);
-		score = -search_solve_3(&next, -(alpha + 1), sort3 >> 4, x1, x3, x4, &search->n_nodes);
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	if ((NEIGHBOUR[x3] & search->board.opponent) && (flipped = board_flip(&search->board, x3))) {
-		board_flip_next(&search->board, x3, flipped, &next);
-		score = -search_solve_3(&next, -(alpha + 1), sort3 >> 8, x1, x2, x4, &search->n_nodes);
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	if ((NEIGHBOUR[x4] & search->board.opponent) && (flipped = board_flip(&search->board, x4))) {
-		board_flip_next(&search->board, x4, flipped, &next);
-		score = -search_solve_3(&next, -(alpha + 1), sort3 >> 12, x1, x2, x3, &search->n_nodes);
-		if (score > bestscore) bestscore = score;
-	}
-
-	else if (bestscore == -SCORE_INF) {	// no move
-		if (can_move(search->board.opponent, search->board.player)) { // pass
-			search_pass_endgame(search);
-			bestscore = -search_solve_4(search, -(alpha + 1));
-			search_pass_endgame(search);
-		} else { // gameover
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			bestscore = search_solve(search);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-			bestscore = board_solve(board, 4);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-			bestscore = board_solve(&search->board, 4);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			bestscore = board_solve(search->board.player, 4);
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 		}
 
 		if ((NEIGHBOUR[x4] & opponent) && (flipped = Flip(x4, player, opponent))) {	// (79%/88%)
@@ -1110,85 +390,8 @@ static int search_solve_4(Search *search, int alpha)
 	} while ((pol = -pol) < 0);
 
 	return board_solve(opponent, 4);	// gameover
-=======
-	board0 = search->board;
-=======
-	player = search->board.player;
-	opponent = search->board.opponent;
->>>>>>> 92a4ad9 (Expand board to 2 ULLs in non-SSE search_solve_3 and _4)
-	for (pol = 1; pol >= -1; pol -= 2) {
-=======
-	pol = 1;
-	do {
->>>>>>> c0fb778 (small optimizations in endgame)
-		// best move alphabeta search
-		bestscore = -SCORE_INF;
-=======
-	pol = -1;
-	do {
-		// best move alphabeta search
-		alpha = ~alpha;	// = -(alpha + 1)
-		bestscore = SCORE_INF;	// Negative score
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
-	bestscore = -SCORE_INF;
-=======
-	bestscore = SCORE_INF;	// min stage
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
-	pol = 1;
-	do {
-		// best move alphabeta search
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-		if ((NEIGHBOUR[x1] & opponent) && (flipped = Flip(x1, player, opponent))) {	// (76%/77%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x1));
-			bestscore = search_solve_3(next_player, next_opponent, alpha, sort3, x2, x3, x4, &search->n_nodes);
-			if (bestscore <= alpha) return bestscore * pol;	// (68%)
-		}
-
-		if ((NEIGHBOUR[x2] & opponent) && (flipped = Flip(x2, player, opponent))) {	// (87%/84%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x2));
-			score = search_solve_3(next_player, next_opponent, alpha, sort3 >> 4, x1, x3, x4, &search->n_nodes);
-			if (score <= alpha) return score * pol;	// (37%)
-			else if (score < bestscore) bestscore = score;
-		}
-
-		if ((NEIGHBOUR[x3] & opponent) && (flipped = Flip(x3, player, opponent))) {	// (77%/80%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x3));
-			score = search_solve_3(next_player, next_opponent, alpha, sort3 >> 8, x1, x2, x4, &search->n_nodes);
-			if (score <= alpha) return score * pol;	// (14%)
-			else if (score < bestscore) bestscore = score;
-		}
-
-		if ((NEIGHBOUR[x4] & opponent) && (flipped = Flip(x4, player, opponent))) {	// (79%/88%)
-			next_player = opponent ^ flipped;
-			next_opponent = player ^ (flipped | x_to_bit(x4));
-			score = search_solve_3(next_player, next_opponent, alpha, sort3 >> 12, x1, x2, x3, &search->n_nodes);
-			if (score < bestscore) bestscore = score;
-			return bestscore * pol;	// (37%)
-		}
-
-		if (bestscore < SCORE_INF)	// (72%)
-			return bestscore * pol;	// (13%)
-
-		next_opponent = player; player = opponent; opponent = next_opponent;	// pass
-		alpha = ~alpha;	// = -(alpha + 1)
-	} while ((pol = -pol) < 0);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	return board_solve(search->board.player, 4);	// gameover
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	return board_solve(player, 4);	// gameover
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
-	return board_solve(opponent, 4);	// gameover
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
-}
-#endif
+}
+#endif
 
 /**
  * @brief  Evaluate a position using a shallow NWS.
@@ -1198,74 +401,17 @@ static int search_solve_4(Search *search, int alpha)
  * Move ordering is constricted to the hole parity and the type of squares.
  * No hashtable are used and anticipated cut-off is limited to stability cut-off.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @param search Search. (breaks board and parity; caller has a copy)
-=======
- * @param search Search. (breaks board and parity)
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
-=======
- * @param search Search. (breaks board and parity; caller take a copy)
->>>>>>> ea8595b (Split v3hi_empties from search_solve_3 & moved to solve_4)
-=======
  * @param search Search. (breaks board and parity; caller has a copy)
->>>>>>> 6a63841 (exit search_shallow/search_eval loop when all bits processed)
  * @param alpha Alpha bound.
  * @return The final score, as a disc difference.
  */
 static int search_shallow(Search *search, const int alpha, bool pass1)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long moves, prioritymoves;
-	int x, prev, score, bestscore;
-=======
-	unsigned long long moves;
-	int x, prev, score, bestscore = -SCORE_INF;
->>>>>>> 8ee1734 (Use get_moves in search_shallow)
-=======
 	unsigned long long moves, prioritymoves;
 	int x, prev, score, bestscore;
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
 	// const int beta = alpha + 1;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	V2DI board0;
-=======
-	vBoard board0;
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-=======
-	V2DI board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	unsigned int parity0;
-=======
-	Board *board = search->board;
-=======
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	SquareList *empty;
-=======
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	Move move;
-=======
-	unsigned long long flipped;
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-	int x, score, bestscore = -SCORE_INF;
-	// const int beta = alpha + 1;
-<<<<<<< HEAD
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	Board board0;
-<<<<<<< HEAD
-	unsigned int parity0, paritymask;
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	unsigned int parity0;
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
 
 	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
 	assert(0 <= search->eval.n_empties && search->eval.n_empties <= DEPTH_TO_SHALLOW_SEARCH);
@@ -1273,39 +419,15 @@ static int search_shallow(Search *search, const int alpha, bool pass1)
 	SEARCH_STATS(++statistics.n_NWS_shallow);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// stability cutoff (try 8%, cut 7%)
-=======
-	// stability cutoff (try 15%, cut 5%)
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
 	// stability cutoff (try 8%, cut 7%)
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 	if (search_SC_NWS(search, alpha, &score)) return score;
-=======
-	if (search_SC_NWS(search, alpha, search->eval.n_empties, &score)) return score;
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
-	if (search_SC_NWS(search, alpha, &score)) return score;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
+
 	board0.board = search->board;
 	moves = vboard_get_moves(board0);
 	if (moves == 0) {	// pass (2%)
 		if (pass1)	// gameover (1%)
 			return search_solve(search);
 
-<<<<<<< HEAD
 		search_pass(search);
 		bestscore = -search_shallow(search, ~alpha, true);
 		// search_pass(search);
@@ -1319,199 +441,10 @@ static int search_shallow(Search *search, const int alpha, bool pass1)
 		prioritymoves = moves;
 
 	if (search->eval.n_empties == 5)	// transfer to search_solve_n, no longer uses n_empties, parity (53%)
-<<<<<<< HEAD
-		do {
-			moves ^= prioritymoves;
-			x = NOMOVE;
-			do {
-				do {
-					x = search->empties[prev = x].next;
-				} while (!(prioritymoves & x_to_bit(x)));	// (58%)
-
-				prioritymoves &= ~x_to_bit(x);
-				search->empties[prev].next = search->empties[x].next;	// remove - maintain single link only
-				vboard_next(board0, x, &search->board);
-				score = search_solve_4(search, alpha);
-				search->empties[prev].next = x;	// restore
-
-				if (score > alpha)	// (49%)
-					return score;
-				else if (score > bestscore)
-					bestscore = score;
-			} while (prioritymoves);	// (34%)
-		} while ((prioritymoves = moves));	// (38%)
-
-	else {
-		--search->eval.n_empties;	// for next depth
-		do {
-			moves ^= prioritymoves;
-			x = NOMOVE;
-			do {
-				do {
-					x = search->empties[prev = x].next;
-				} while (!(prioritymoves & x_to_bit(x)));	// (57%)
-
-				prioritymoves &= ~x_to_bit(x);
-				search->eval.parity = parity0 ^ QUADRANT_ID[x];
-				search->empties[prev].next = search->empties[x].next;	// remove - maintain single link only
-				vboard_next(board0, x, &search->board);
-				score = -search_shallow(search, ~alpha, false);
-				search->empties[prev].next = x;	// restore
-
-				if (score > alpha) {	// (40%)
-<<<<<<< HEAD
-<<<<<<< HEAD
-					// search->board = board0.board;
-					// search->eval.parity = parity0;
-					++search->eval.n_empties;
-					return score;
-
-				} else if (score > bestscore)
-					bestscore = score;
-			} while (prioritymoves);	// (54%)
-		} while ((prioritymoves = moves));	// (23%)
-		++search->eval.n_empties;
-=======
-		foreach_odd_empty (empty, search->empties, search->parity) {
-=======
-	if (search->eval.parity > 0 && search->eval.parity < 15) {
-
-		foreach_odd_empty (empty, search->empties, search->eval.parity) {
-<<<<<<< HEAD
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-			if ((NEIGHBOUR[empty->x] & board->opponent)
-			&& board_get_move(board, empty->x, &move)) {
-=======
-			if ((NEIGHBOUR[empty->x] & search->board.opponent)
-			&& board_get_move(&search->board, empty->x, &move)) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-				search_update_endgame(search, &move);
-					if (search->n_empties == 4) score = -search_solve_4(search, -(alpha + 1));
-					else score = -search_shallow(search, -(alpha + 1));
-				search_restore_endgame(search, &move);
-				if (score > alpha) return score;
-				else if (score > bestscore) bestscore = score;
-			}
-		}
-=======
-=======
-	moves = get_moves(search->board.player, search->board.opponent);
-=======
-	board0 = load_vboard(search->board);
-	moves = vboard_get_moves(board0, search->board);
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-	if (moves == 0) {	// pass (2%)
-		if (pass1)	// gameover
-			return search_solve(search);
-
-		search_pass(search);
-		bestscore = -search_shallow(search, ~alpha, true);
-		// search_pass(search);
-		return bestscore;
-	}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 8ee1734 (Use get_moves in search_shallow)
-=======
-	bestscore  = -SCORE_INF;
-<<<<<<< HEAD
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-	board0 = search->board;
-=======
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-=======
-	bestscore = -SCORE_INF;
->>>>>>> 9ea5b5e (BMI2 and mm_LastFlip version of board_score_sse_1 added (but not enabled))
-	parity0 = search->eval.parity;
-	prioritymoves = moves & quadrant_mask[parity0];
-	if (prioritymoves == 0)	// all even
-		prioritymoves = moves;
-<<<<<<< HEAD
-	--search->eval.n_empties;	// for next depth
-<<<<<<< HEAD
-	do {	// odd first, even second
-<<<<<<< HEAD
-		if (paritymask) {	// skip all even or all add
-			foreach_empty (x, search->empties) {
-=======
-		if (paritymask) {	// skip no odd or no even
-			for (x = search->empties[prev = NOMOVE].next; x != NOMOVE; x = search->empties[prev = x].next) {	// maintain single link only
-<<<<<<< HEAD
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-				if (paritymask & QUADRANT_ID[x]) {
-					if ((NEIGHBOUR[x] & board0.opponent) && (flipped = board_flip(&board0, x))) {
-						search->eval.parity = parity0 ^ QUADRANT_ID[x];
-						empty_remove(search->empties, x);
-						search->board.player = board0.opponent ^ flipped;
-						search->board.opponent = board0.player ^ (flipped | x_to_bit(x));
-						board_check(&search->board);
-
-<<<<<<< HEAD
-						if (search->eval.n_empties == 4) score = -search_solve_4(search, -(alpha + 1));
-						else score = -search_shallow(search, -(alpha + 1));
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-						if (search->eval.n_empties == 4)
-							score = -search_solve_4(search, -(alpha + 1));
-						else	score = -search_shallow(search, -(alpha + 1));
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-
-						empty_restore(search->empties, x);
-=======
-				if ((moves & x_to_bit(x)) && (paritymask & QUADRANT_ID[x])) {
-					search->eval.parity = parity0 ^ QUADRANT_ID[x];
-					search->empties[prev].next = search->empties[x].next;	// remove
-					board_next(&board0, x, &search->board);
-=======
-	do {
-		x = search->empties[prev = NOMOVE].next;	// maintain single link only
-		do {
-			if (prioritymoves & x_to_bit(x)) {	// (37%)
-				search->eval.parity = parity0 ^ QUADRANT_ID[x];
-				search->empties[prev].next = search->empties[x].next;	// remove
-				board_next(&board0, x, &search->board);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-
-				if (search->eval.n_empties == 4)	// (57%)
-=======
-
-	if (search->eval.n_empties == 5)	// transfer to search_solve_n, no longer uses n_empties, parity
-=======
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 		do {
 			moves ^= prioritymoves;
 			x = NOMOVE;
 			do {
-<<<<<<< HEAD
-				if (prioritymoves & x_to_bit(x)) {
-					search->empties[prev].next = search->empties[x].next;	// remove
-<<<<<<< HEAD
-					board_next(&board0, x, &search->board);
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
-					vboard_next(board0, x, &search->board);
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-					score = -search_solve_4(search, ~alpha);
-					search->empties[prev].next = x;	// restore
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-					search->empties[prev].next = x;	// restore
->>>>>>> 8ee1734 (Use get_moves in search_shallow)
-=======
-				search->empties[prev].next = x;	// restore
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-					if (score > alpha)
-						return score;
-					else if (score > bestscore)
-						bestscore = score;
-				}
-			} while ((x = search->empties[prev = x].next) != NOMOVE);
-		} while ((prioritymoves = (moves ^= prioritymoves)));
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
 				do {
 					x = search->empties[prev = x].next;
 				} while (!(prioritymoves & x_to_bit(x)));	// (58%)
@@ -1526,14 +459,8 @@ static int search_shallow(Search *search, const int alpha, bool pass1)
 					return score;
 				else if (score > bestscore)
 					bestscore = score;
-<<<<<<< HEAD
-			} while (prioritymoves);
-		} while ((prioritymoves = moves));
->>>>>>> 6a63841 (exit search_shallow/search_eval loop when all bits processed)
-=======
 			} while (prioritymoves);	// (34%)
 		} while ((prioritymoves = moves));	// (38%)
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
 
 	else {
 		--search->eval.n_empties;	// for next depth
@@ -1553,13 +480,7 @@ static int search_shallow(Search *search, const int alpha, bool pass1)
 				search->empties[prev].next = x;	// restore
 
 				if (score > alpha) {	// (40%)
-					// store_vboard(search->board, board0);
-=======
-					// search->board = *(Board *) &board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
 					// search->board = board0.board;
->>>>>>> fe6dce7 (consistent vboard usage for eval_1 and eval_2)
 					// search->eval.parity = parity0;
 					++search->eval.n_empties;
 					return score;
@@ -1573,23 +494,6 @@ static int search_shallow(Search *search, const int alpha, bool pass1)
 	// search->board = board0.board;
 	// search->eval.parity = parity0;
 
-<<<<<<< HEAD
-	// no move
-	if (bestscore == -SCORE_INF) {
-		if (can_move(search->board.opponent, search->board.player)) { // pass
-			search_pass_endgame(search);
-			bestscore = -search_shallow(search, -(alpha + 1));
-			search_pass_endgame(search);
-		} else { // gameover
-			bestscore = search_solve(search);
-		}
->>>>>>> 6506166 (More SSE optimizations)
-	}
-	// search->board = board0.board;
-	// search->eval.parity = parity0;
-
-=======
->>>>>>> 8ee1734 (Use get_moves in search_shallow)
  	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
 	return bestscore;	// (33%)
 }
@@ -1608,178 +512,26 @@ static int search_shallow(Search *search, const int alpha, bool pass1)
  */
 int NWS_endgame(Search *search, const int alpha)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int score, ofssolid, bestscore;
-<<<<<<< HEAD
 	unsigned long long hash_code, solid_opp;
 	// const int beta = alpha + 1;
 	HashStoreData hash_data;
 	Move *move;
 	long long nodes_org;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	V2DI board0;
 	Board hashboard;
-=======
-	V2DI board0, hashboard;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-	V2DI board0;
-	Board hashboard;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 	unsigned int parity0;
 	unsigned long long full[5];
 	MoveList movelist;
-<<<<<<< HEAD
-
-	assert(bit_count(~(search->board.player|search->board.opponent)) < DEPTH_MIDGAME_TO_ENDGAME);
-	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
-=======
-	int score;
-=======
-	int score, ofssolid;
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-=======
-	int score, ofssolid, bestscore;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-	HashTable *const hash_table = &search->hash_table;
-=======
->>>>>>> 3a92d84 (minor AVX512/SSE optimizations)
-	unsigned long long hash_code, solid_opp;
-	// const int beta = alpha + 1;
-<<<<<<< HEAD
-	HashData hash_data;
-	HashStoreData hash_store_data;
-	MoveList movelist;
-<<<<<<< HEAD
-	Move *move, *bestmove;
-<<<<<<< HEAD
-	long long cost;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-=======
-=======
-	HashStoreData hash_data;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-	Move *move;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-	long long nodes_org;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-	Board board0;
-	unsigned int parity0;
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	Board board0, hashboard;
-=======
-	vBoard board0;
-=======
-	rBoard board0;
->>>>>>> 78ce5d7 (more precise rboard/vboard opt; reexamine neon vboard_next)
-	Board hashboard;
->>>>>>> 3a92d84 (minor AVX512/SSE optimizations)
-	unsigned int parity0;
-<<<<<<< HEAD
-	V4DI full;
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-=======
-	unsigned long long full[5];
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 4303b09 (Returns all full lines in full[4])
-=======
-	bool ffull;
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-
-	if (search->stop) return alpha;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	assert(search->n_empties == bit_count(~(search->board.player|search->board.opponent)));
-=======
-	assert(search->eval.n_empties == bit_count(~(search->board.player|search->board.opponent)));
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-	struct size_reduced_MoveList {	// derived from MoveList in move.h
-		int n_moves;
-		Move move[DEPTH_MIDGAME_TO_ENDGAME];
-	} movelist;
-=======
->>>>>>> d8589d2 (Init 4.5.3: abandon size_reduced_movelist which confuses gcc warn)
 
 	assert(bit_count(~(search->board.player|search->board.opponent)) < DEPTH_MIDGAME_TO_ENDGAME);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
 
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	if (search->stop) return alpha;
 
->>>>>>> 19da175 (differed movelist sort in PVS/NWS_shallow)
 	SEARCH_STATS(++statistics.n_NWS_endgame);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-
-<<<<<<< HEAD
-	if (search->eval.n_empties <= DEPTH_TO_SHALLOW_SEARCH) return search_shallow(search, alpha);
-
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
->>>>>>> 8ee1734 (Use get_moves in search_shallow)
-	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// stability cutoff
-<<<<<<< HEAD
-	hashboard = board0.board = search->board;
-	ofssolid = 0;
-	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[search->eval.n_empties]) {	// (7%)
-		CUTOFF_STATS(++statistics.n_stability_try;)
-		score = SCORE_MAX - 2 * get_stability_fulls(search->board.opponent, search->board.player, full);
-		if (score <= alpha) {	// (3%)
-			CUTOFF_STATS(++statistics.n_stability_low_cutoff;)
-			return score;
-=======
-	if (search_SC_NWS(search, alpha, &score)) return score;
-
-=======
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-	// transposition cutoff
-
-=======
->>>>>>> 9794cc1 (Store solid-normalized hash in PVS_midgame)
-	// Improvement of Serch by Reducing Redundant Information in a Position of Othello
-	// Hidekazu Matsuo, Shuji Narazaki
-	// http://id.nii.ac.jp/1001/00156359/
-	// (1-2% improvement)
-	hashboard = search->board;
-	ofssolid = 0;
-	if (search->eval.n_empties <= MASK_SOLID_DEPTH) {	// (72%)
-		get_all_full_lines(hashboard.player | hashboard.opponent, full);
-		solid_opp = full[4] & hashboard.opponent;	// full[4] = all full
-		hashboard.player ^= solid_opp;	// normalize solid to player
-		hashboard.opponent ^= solid_opp;
-		ofssolid = bit_count(solid_opp) * 2;	// hash score is ofssolid grater than real
-	}
-	hash_code = board_get_hash_code(&hashboard);
-
-=======
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
 	// stability cutoff
 	hashboard = board0.board = search->board;
 	ofssolid = 0;
@@ -1812,204 +564,6 @@ int NWS_endgame(Search *search, const int alpha)
 
 	search_get_movelist(search, &movelist);
 
-<<<<<<< HEAD
-	nodes_org = search->n_nodes;
-
-	// special cases
-	if (movelist_is_empty(&movelist)) {	// (1%)
-		if (can_move(search->board.opponent, search->board.player)) { // pass
-			board_pass(&search->board);
-			bestscore = -NWS_endgame(search, ~alpha);
-			board_pass(&search->board);
-			hash_store_data.data.move[0] = PASS;
-		} else  { // game over
-<<<<<<< HEAD
-			bestmove->score = search_solve(search);
-			bestmove->x = NOMOVE;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-			bestscore = search_solve(search);
-			hash_store_data.data.move[0] = NOMOVE;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-		}
-<<<<<<< HEAD
-
-<<<<<<< HEAD
-		// Improvement of Serch by Reducing Redundant Information in a Position of Othello
-		// Hidekazu Matsuo, Shuji Narazaki
-		// http://id.nii.ac.jp/1001/00156359/
-		if (search->eval.n_empties <= MASK_SOLID_DEPTH) {	// (99%)
-			solid_opp = full[4] & hashboard.opponent;	// full[4] = all full
-#ifndef POPCOUNT
-			if (solid_opp)	// (72%)
-#endif
-			{
-				hashboard.player ^= solid_opp;	// normalize solid to player
-				hashboard.opponent ^= solid_opp;
-				ofssolid = bit_count(solid_opp) * 2;	// hash score is ofssolid grater than real
-=======
-		bestmove = movelist->move; bestmove->score = -SCORE_INF;
-=======
-	} else {
-		if (movelist.n_moves > 1)	// (97%)
-			movelist_evaluate(&movelist, search, &hash_data, alpha, 0);
-=======
-	if (movelist.n_moves > 1) {	// (96%)
-<<<<<<< HEAD
-		// Improvement of Serch by Reducing Redundant Information in a Position of Othello
-		// Hidekazu Matsuo, Shuji Narazaki
-		// http://id.nii.ac.jp/1001/00156359/
-		// (1-2% improvement)
-		hashboard = search->board;
-		ofssolid = 0;
-		if (search->eval.n_empties <= MASK_SOLID_DEPTH) {	// (72%)
-			if (!ffull)
-				get_all_full_lines(hashboard.player | hashboard.opponent, full);
-			solid_opp = full[4] & hashboard.opponent;	// full[4] = all full
-			hashboard.player ^= solid_opp;	// normalize solid to player
-			hashboard.opponent ^= solid_opp;
-			ofssolid = bit_count(solid_opp) * 2;	// hash score is ofssolid grater than real
-		}
-<<<<<<< HEAD
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
-		hash_code = board_get_hash_code(&hashboard);
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-
-=======
->>>>>>> 30464b5 (add hash_prefetch to NWS_endgame)
-		// transposition cutoff
-		if (hash_get(&search->hash_table, &hashboard, hash_code, &hash_data.data)) {	// (6%)
-			hash_data.data.lower -= ofssolid;
-			hash_data.data.upper -= ofssolid;
-			if (search_TC_NWS(&hash_data.data, search->eval.n_empties, NO_SELECTIVITY, alpha, &score))	// (6%)
-				return score;
-		}
-		// else if (ofssolid)	// slows down
-		//	hash_get_from_board(&search->hash_table, HBOARD_V(board0), &hash_data.data);
-
-		movelist_evaluate_fast(&movelist, search, &hash_data.data);
-
-		nodes_org = search->n_nodes;
-		parity0 = search->eval.parity;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		bestmove = movelist.move; bestmove->score = -SCORE_INF;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-=======
-		--search->eval.n_empties;	// for next move
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
->>>>>>> 8c70641 (refactor NWS_endgame loop)
-		bestscore = -SCORE_INF;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-		// loop over all moves
-		move = &movelist.move[0];
-		if (--search->eval.n_empties <= DEPTH_TO_SHALLOW_SEARCH)	// for next move (44%)
-			while ((move = move_next_best(move))) {	// (72%)
-				search->eval.parity = parity0 ^ QUADRANT_ID[move->x];
-				search->empties[search->empties[move->x].previous].next = search->empties[move->x].next;	// remove - maintain single link only
-				vboard_update(&search->board, board0, move);
-				score = -search_shallow(search, ~alpha, false);
-				search->empties[search->empties[move->x].previous].next = move->x;	// restore
-				search->board = board0.board;
-
-<<<<<<< HEAD
-			search->eval.parity = parity0;
-			empty_restore(search->empties, move->x);
-			store_rboard(search->board, board0);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (move->score > bestmove->score) {
-				bestmove = move;
-				if (bestmove->score > alpha) break;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-			if (score > bestscore) {
-				bestscore = score;
-				hash_store_data.data.move[0] = move->x;
-				if (bestscore > alpha) break;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
-			if (score > bestscore) {	// (66%)
-				bestscore = score;
-				hash_data.data.move[0] = move->x;
-				if (bestscore > alpha) break;	// (57%)
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-				if (score > bestscore) {	// (63%)
-					bestscore = score;
-					hash_data.data.move[0] = move->x;
-					if (bestscore > alpha) break;	// (48%)
-				}
->>>>>>> 8c70641 (refactor NWS_endgame loop)
-			}
-		else
-			while ((move = move_next_best(move))) {	// (76%)
-				search->eval.parity = parity0 ^ QUADRANT_ID[move->x];
-				empty_remove(search->empties, move->x);
-				vboard_update(&search->board, board0, move);
-				score = -NWS_endgame(search, ~alpha);
-				empty_restore(search->empties, move->x);
-				search->board = board0.board;
-
-				if (score > bestscore) {	// (63%)
-					bestscore = score;
-					hash_data.data.move[0] = move->x;
-					if (bestscore > alpha) break;	// (39%)
-				}
-			}
-		++search->eval.n_empties;
-		search->eval.parity = parity0;
-
-		if (search->stop)	// (1%)
-			return alpha;
-
-		hash_data.data.wl.c.depth = search->eval.n_empties;
-		hash_data.data.wl.c.selectivity = NO_SELECTIVITY;
-		hash_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-		// hash_data.data.move[0] = bestmove;
-		hash_data.alpha = alpha + ofssolid;
-		hash_data.beta = alpha + ofssolid + 1;
-		hash_data.score = bestscore + ofssolid;
-		hash_store(&search->hash_table, &hashboard, hash_code, &hash_data);
-
-	// special cases
-	} else if (movelist.n_moves == 1) {	// (3%)
-		parity0 = search->eval.parity;
-		move = movelist_first(&movelist);
-		search_swap_parity(search, move->x);
-		empty_remove(search->empties, move->x);
-		vboard_update(&search->board, board0, move);
-		if (--search->eval.n_empties <= DEPTH_TO_SHALLOW_SEARCH)	// (56%)
-			bestscore = -search_shallow(search, ~alpha, false);
-		else	bestscore = -NWS_endgame(search, ~alpha);
-		++search->eval.n_empties;
-		empty_restore(search->empties, move->x);
-		search->eval.parity = parity0;
-		search->board = board0.board;
-
-	} else {	// (1%)
-		if (can_move(search->board.opponent, search->board.player)) { // pass
-			search_pass(search);
-			bestscore = -NWS_endgame(search, ~alpha);
-			search_pass(search);
-		} else  { // game over
-			bestscore = search_solve(search);
-		}
-	}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	hash_code = board_get_hash_code(&hashboard);
-	hash_prefetch(&search->hash_table, hash_code);
-
-	search_get_movelist(search, &movelist);
-
 	if (movelist.n_moves > 1) {	// (96%)
 		// transposition cutoff
 		if (hash_get(&search->hash_table, &hashboard, hash_code, &hash_data.data)) {	// (6%)
@@ -2095,42 +649,7 @@ int NWS_endgame(Search *search, const int alpha)
 			search_pass(search);
 		} else  { // game over
 			bestscore = search_solve(search);
-=======
-	if (!search->stop) {
-		hash_store_data.data.wl.c.depth = search->eval.n_empties;
-		hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY;
-		hash_store_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-		// hash_store_data.data.move[0] = bestmove;
-		hash_store_data.alpha = alpha + ofssolid;
-		hash_store_data.beta = alpha + ofssolid + 1;
-		hash_store_data.score = bestscore + ofssolid;
-		hash_store(hash_table, &hashboard, hash_code, &hash_store_data);
-
-		if (SQUARE_STATS(1) + 0) {
-			foreach_move(move, movelist)
-<<<<<<< HEAD
-				++statistics.n_played_square[search->n_empties][SQUARE_TYPE[move->x]];
-			if (bestmove->score > alpha) ++statistics.n_good_square[search->n_empties][SQUARE_TYPE[bestmove->score]];
->>>>>>> 6506166 (More SSE optimizations)
-=======
-				++statistics.n_played_square[search->eval.n_empties][SQUARE_TYPE[move->x]];
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (bestmove->score > alpha) ++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[bestmove->score]];
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-			if (bestmove->score > alpha)
-				++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[bestmove->score]];
->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line)
-		}
-=======
-			if (bestscore > alpha)
-				++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[bestscore]];
 		}
-	 	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	 	assert((bestscore & 1) == 0);
-		return bestscore;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
 	}
 
 	if (SQUARE_STATS(1) + 0) {
@@ -2139,28 +658,6 @@ int NWS_endgame(Search *search, const int alpha)
 		if (bestscore > alpha)
 			++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[bestscore]];
 	}
-=======
-	if (search->stop)
-		return alpha;
-
-	hash_store_data.data.wl.c.depth = search->eval.n_empties;
-	hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY;
-	hash_store_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-	// hash_store_data.data.move[0] = bestmove;
-	hash_store_data.alpha = alpha + ofssolid;
-	hash_store_data.beta = alpha + ofssolid + 1;
-	hash_store_data.score = bestscore + ofssolid;
-	hash_store(hash_table, &hashboard, hash_code, &hash_store_data);
-
-=======
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-	if (SQUARE_STATS(1) + 0) {
-		foreach_move(move, movelist)
-			++statistics.n_played_square[search->eval.n_empties][SQUARE_TYPE[move->x]];
-		if (bestscore > alpha)
-			++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[bestscore]];
-	}
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
  	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
  	assert((bestscore & 1) == 0);
 	return bestscore;
diff --git a/src/endgame_neon.c b/src/endgame_neon.c
index 5a47941..346063f 100644
--- a/src/endgame_neon.c
+++ b/src/endgame_neon.c
@@ -6,31 +6,10 @@
  *
  * Bitboard and empty list is kept in Neon registers.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
  * @date 1998 - 2024
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
- * @date 1998 - 2022
-=======
- * @date 1998 - 2023
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
  * 
  */
 
@@ -42,16 +21,7 @@
 #define TESTZ_FLIP(X)	(!vgetq_lane_u64((X), 0))
 
 #ifndef HAS_CPU_64
-<<<<<<< HEAD
-<<<<<<< HEAD
 	#define vaddv_u8(x)	vget_lane_u32(vreinterpret_u32_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(x)))), 0)
-=======
-#define vaddv_u8(x)	vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(x))), 0)
-#define vaddvq_u16(x)	vget_lane_u64(vpaddl_u32(vpaddl_u16(vadd_u16(vget_high_u16(x), vget_low_u16(x)))), 0)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	#define vaddv_u8(x)	vget_lane_u32(vreinterpret_u32_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(x)))), 0)
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 #endif
 
 // in count_last_flip_neon.c
@@ -63,31 +33,6 @@ extern const uint64x2_t mask_dvhd[64][2];
  *
  * @param OP board to play the move on.
  * @param x move to play.
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @param flipped flipped returned from mm_Flip.
- * @return resulting board.
- */
-static inline uint64x2_t board_flip_next(uint64x2_t OP, int x, uint64x2_t flipped)
-{
-#if !defined(_MSC_VER) && !defined(__clang__)	// MSVC-arm32 does not have vld1q_lane_u64
-	// arm64-gcc-13: 8, armv8a-clang-16: 8, msvc-arm64-19: 8, gcc-arm-13: 16, clang-armv7-11: 18
-	OP = veorq_u64(OP, vorrq_u64(flipped, vld1q_lane_u64((uint64_t *) &X_TO_BIT[x], flipped, 0)));
-	return vextq_u64(OP, OP, 1);
-#else	// arm64-gcc-13: 8, armv8a-clang-16: 7, msvc-arm64-19: 7, gcc-arm-13: 21, clang-armv7-11: 15
-	OP = veorq_u64(OP, flipped);
-	return vcombine_u64(vget_high_u64(OP), vorr_u64(vget_low_u64(OP), vld1_u64((uint64_t *) &X_TO_BIT[x])));
-#endif
-=======
- * @param next resulting board.
- * @return true if no flips.
- */
-static inline uint64x2_t board_next_neon(uint64x2_t OP, int x, uint64x2_t flipped)
-{
-	OP = veorq_u64(OP, flipped);
-	return vcombine_u64(vget_high_u64(OP), vorr_u64(vget_low_u64(OP), vcreate_u64(X_TO_BIT[x])));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
  * @param flipped flipped returned from mm_Flip.
  * @return resulting board.
  */
@@ -97,14 +42,10 @@ static inline uint64x2_t board_flip_next(uint64x2_t OP, int x, uint64x2_t flippe
 	// arm64-gcc-13: 8, armv8a-clang-16: 8, msvc-arm64-19: 8, gcc-arm-13: 16, clang-armv7-11: 18
 	OP = veorq_u64(OP, vorrq_u64(flipped, vld1q_lane_u64((uint64_t *) &X_TO_BIT[x], flipped, 0)));
 	return vextq_u64(OP, OP, 1);
-<<<<<<< HEAD
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
 #else	// arm64-gcc-13: 8, armv8a-clang-16: 7, msvc-arm64-19: 7, gcc-arm-13: 21, clang-armv7-11: 15
 	OP = veorq_u64(OP, flipped);
 	return vcombine_u64(vget_high_u64(OP), vorr_u64(vget_low_u64(OP), vld1_u64((uint64_t *) &X_TO_BIT[x])));
 #endif
->>>>>>> 78ce5d7 (more precise rboard/vboard opt; reexamine neon vboard_next)
 }
 
 /**
@@ -112,71 +53,34 @@ static inline uint64x2_t board_flip_next(uint64x2_t OP, int x, uint64x2_t flippe
  *
  * Get the final score, when no move can be made.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @param P Board.player
- * @param n_empties Number of empty squares remaining on the board.
- * @return The final score, as a disc difference.
- */
-static int board_solve_neon(uint64x1_t P, int n_empties)
-{
-	int score = vaddv_u8(vcnt_u8(vreinterpret_u8_u64(P))) * 2 - SCORE_MAX;	// in case of opponents win
-=======
- * @param OP Board.
-=======
  * @param P Board.player
->>>>>>> 26dad03 (Use player bits only in board_score_1)
  * @param n_empties Number of empty squares remaining on the board.
  * @return The final score, as a disc difference.
  */
 static int board_solve_neon(uint64x1_t P, int n_empties)
 {
-<<<<<<< HEAD
-	int score = vaddv_u8(vcnt_u8(vreinterpret_u8_u64(vget_low_u64(OP)))) * 2 - SCORE_MAX;	// in case of opponents win
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 	int score = vaddv_u8(vcnt_u8(vreinterpret_u8_u64(P))) * 2 - SCORE_MAX;	// in case of opponents win
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 	int diff = score + n_empties;		// = n_discs_p - (64 - n_empties - n_discs_p)
 
 	SEARCH_STATS(++statistics.n_search_solve);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if (diff == 0)
 		score = diff;
 	else if (diff > 0)
 		score = diff + n_empties;
-=======
-	if (diff >= 0)
-		score = diff;
-	if (diff > 0)
-		score += n_empties;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	if (diff == 0)
-		score = diff;
-	else if (diff > 0)
-		score = diff + n_empties;
->>>>>>> c0fb778 (small optimizations in endgame)
 	return score;
 }
 
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * Get the final score, when 1 empty square remain.
  * The original code has been adapted from Zebra by Gunnar Anderson.
  *
  * @param P      Board.player to evaluate.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @param alpha  Alpha bound. (beta - 1)
  * @param pos    Last empty square to play.
  * @return       The final score, as a disc difference.
-<<<<<<< HEAD
  */
 #if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SVE) && defined(SIMULLASTFLIP)
 
@@ -247,47 +151,6 @@ static int board_score_neon_1(uint64x1_t P, int alpha, int pos)
 	const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
 	uint64x2_t	PP = vdupq_lane_u64(P, 0);
 	uint64x2_t	I0, I1;
-=======
- * Get the final score, when 1 empty squares remain.
- * The following code has been adapted from Zebra by Gunnar Anderson.
-=======
- * Get the final score, when 1 empty square remain.
- * The original code has been adapted from Zebra by Gunnar Anderson.
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
- *
- * @param P      Board.player to evaluate.
- * @param beta   Beta bound.
-=======
- * @param beta   Beta bound - 1.
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
- * @param alpha  Alpha bound. (beta - 1)
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
- * @param pos    Last empty square to play.
- * @return       The final opponent score, as a disc difference.
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
- */
-static int board_score_neon_1(uint64x1_t P, int alpha, int pos)
-{
-	int	score = 2 * vaddv_u8(vcnt_u8(vreinterpret_u8_u64(P))) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-	int	score2;
-	unsigned int	n_flips, m;
-	const unsigned char *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint8x16_t	PP = vzipq_u8(vreinterpretq_u8_u64(OP), vreinterpretq_u8_u64(OP)).val[0];
-	uint16x8_t	II;	// 2 dirs interleaved
-	const uint8x16_t dmask = { 1, 1, 2, 2, 4, 4, 8, 8, 16, 16, 32, 32, 64, 64, 128, 128 };
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	uint64x2_t	PP, I0, I1;
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
-	uint64x2_t	PP = vdupq_lane_u64(P, 0);
-	uint64x2_t	I0, I1;
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 	static const unsigned short o_mask[64] = {
 		0xff01, 0x7f03, 0x3f07, 0x1f0f, 0x0f1f, 0x073f, 0x037f, 0x01ff,
 		0xfe03, 0xff07, 0x7f0f, 0x3f1f, 0x1f3f, 0x0f7f, 0x07ff, 0x03fe,
@@ -299,62 +162,6 @@ static int board_score_neon_1(uint64x1_t P, int alpha, int pos)
 		0x80ff, 0xc0fe, 0xe0fc, 0xf0f8, 0xf8f0, 0xfce0, 0xfec0, 0xff80
 	};
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// n_flips = last_flip(pos, P);
-  #ifdef HAS_CPU_64	// vaddvq
-	unsigned int t0, t1;
-	const uint64x2_t dmask = { 0x0808040402020101, 0x8080404020201010 };
-
-	PP = vreinterpretq_u64_u8(vzip1q_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(PP)));
-	I0 = vandq_u64(PP, mask_dvhd[pos][0]);	// 2 dirs interleaved
-	t0 = vaddvq_u16(vreinterpretq_u16_u64(I0));
-	n_flips  = COUNT_FLIP_X[t0 >> 8];
-	n_flips += COUNT_FLIP_X[t0 & 0xFF];
-	I1 = vandq_u64(vreinterpretq_u64_u8(vtstq_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(mask_dvhd[pos][1]))), dmask);
-	t1 = vaddvq_u16(vreinterpretq_u16_u64(I1));
-	n_flips += COUNT_FLIP_Y[t1 >> 8];
-	n_flips += COUNT_FLIP_Y[t1 & 0xFF];
-
-  #else // Neon kindergarten
-	const uint64x2_t dmask = { 0x1020408001020408, 0x1020408001020408 };
-
-	I0 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_u64(vandq_u64(PP, mask_dvhd[pos][0])))));
-	n_flips  = COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(I0), 2)];
-	n_flips += COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(I0), 0)];
-	I1 = vreinterpretq_u64_s8(vnegq_s8(vreinterpretq_s8_u8(vtstq_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(mask_dvhd[pos][1])))));
-	I1 = vpaddlq_u32(vmulq_u32(vreinterpretq_u32_u64(dmask), vreinterpretq_u32_u64(I1)));
-	n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(I1), 11)];
-	n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(I1), 3)];
-  #endif
-	score += n_flips;
-
-	if (n_flips == 0) {
-		score2 = score - 2;	// empty for opponent
-		if (score <= 0)
-			score = score2;
-
-		if (score > alpha) {	// lazy cut-off
-			// n_flips = last_flip(pos, O);
-			m = o_mask[pos];	// valid diagonal bits
-  #ifdef HAS_CPU_64
-			n_flips  = COUNT_FLIP_X[(t0 >> 8) ^ 0xFF];
-			n_flips += COUNT_FLIP_X[(t0 ^ m) & 0xFF];
-			n_flips += COUNT_FLIP_Y[(t1 ^ m) >> 8];
-			n_flips += COUNT_FLIP_Y[(~t1) & 0xFF];
-  #else
-			n_flips  = COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(I0), 2) ^ 0xFF];
-			n_flips += COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(I0), 0) ^ (m & 0xFF)];
-			n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(I1), 11) ^ (m >> 8)];
-			n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(I1), 3) ^ 0xFF];
-  #endif
-			if (n_flips != 0)
-				score = score2 - n_flips;
-=======
-	score = SCORE_MAX - 2 - 2 * vaddv_u8(vcnt_u8(vreinterpret_u8_u64(vget_low_u64(OP))));	// 2 * bit_count(O) - SCORE_MAX
-
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	// n_flips = last_flip(pos, P);
   #ifdef HAS_CPU_64	// vaddvq
 	unsigned int t0, t1;
@@ -403,32 +210,14 @@ static int board_score_neon_1(uint64x1_t P, int alpha, int pos)
 			n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(I1), 3) ^ 0xFF];
   #endif
 			if (n_flips != 0)
-<<<<<<< HEAD
-				score = score2 + n_flips;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 				score = score2 - n_flips;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 		}
 	}
 
 	return score;
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
 #endif
 
-// from bench.c
-int board_score_1(const unsigned long long player, const int beta, const int x)
-{
-	return board_score_neon_1(vcreate_u64(player), beta, x);
-}
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#endif
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
-
 // from bench.c
 int board_score_1(const unsigned long long player, const int beta, const int x)
 {
@@ -438,32 +227,7 @@ int board_score_1(const unsigned long long player, const int beta, const int x)
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final min score, when 2 empty squares remain.
- *
- * @param OP The board to evaluate.
- * @param alpha Alpha bound.
- * @param n_nodes Node counter.
- * @param empties Packed empty square coordinates.
- * @return The final min score, as a disc difference.
- */
-static int board_solve_2(uint64x2_t OP, int alpha, volatile unsigned long long *n_nodes, uint8x8_t empties)
-{
-	uint64x2_t flipped;
-	int score, bestscore, nodes;
-	int x1 = vget_lane_u8(empties, 1);
-	int x2 = vget_lane_u8(empties, 0);
-	unsigned long long opponent;
-=======
- * Get the final score, when 2 empty squares remain.
-=======
- * Get the final max score, when 2 empty squares remain.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * Get the final min score, when 2 empty squares remain.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  *
  * @param OP The board to evaluate.
  * @param alpha Alpha bound.
@@ -477,69 +241,16 @@ static int board_solve_2(uint64x2_t OP, int alpha, volatile unsigned long long *
 	int score, bestscore, nodes;
 	int x1 = vget_lane_u8(empties, 1);
 	int x2 = vget_lane_u8(empties, 0);
-<<<<<<< HEAD
-	unsigned long long bb;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 	unsigned long long opponent;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
 	// const int beta = alpha + 1;
 
 	SEARCH_STATS(++statistics.n_board_solve_2);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	opponent = vgetq_lane_u64(OP, 1);
 	if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
 		bestscore = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x2);
 
 		if ((bestscore > alpha) && (NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-<<<<<<< HEAD
-			score = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x1);
-			if (score < bestscore)
-				bestscore = score;
-			nodes = 3;
-		} else	nodes = 2;
-
-	} else if ((NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-		bestscore = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x1);
-		nodes = 2;
-
-	} else {	// pass - NEIGHBOUR test is almost 100% true
-		alpha = ~alpha;	// = -alpha - 1
-		OP = vextq_u64(OP, OP, 1);
-		if (!TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-			bestscore = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x2);
-
-			if ((bestscore > alpha) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-				score = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x1);
-				if (score < bestscore)
-					bestscore = score;
-				nodes = 3;
-			} else	nodes = 2;
-
-		} else if (!TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-			bestscore = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x1);
-			nodes = 2;
-
-		} else {	// gameover
-			bestscore = board_solve_neon(vget_high_u64(OP), 2);
-			nodes = 1;
-		}
-		bestscore = -bestscore;
-=======
-	bb = vgetq_lane_u64(OP, 1);	// opponent
-	if ((NEIGHBOUR[x1] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha + 1, x2);
-=======
-	opponent = vgetq_lane_u64(OP, 1);
-	if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x2);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-
-		if ((bestscore <= alpha) && (NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 			score = board_score_neon_1(vget_high_u64(veorq_u64(OP, flipped)), alpha, x1);
 			if (score < bestscore)
 				bestscore = score;
@@ -571,11 +282,7 @@ static int board_solve_2(uint64x2_t OP, int alpha, volatile unsigned long long *
 			bestscore = board_solve_neon(vget_high_u64(OP), 2);
 			nodes = 1;
 		}
-<<<<<<< HEAD
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 		bestscore = -bestscore;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
 	}
 
 	SEARCH_UPDATE_2EMPTIES_NODES(*n_nodes += nodes;)
@@ -587,30 +294,7 @@ static int board_solve_2(uint64x2_t OP, int alpha, volatile unsigned long long *
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final max score, when 3 empty squares remain.
- *
- * @param OP The board to evaluate.
- * @param alpha Alpha bound.
- * @param n_nodes Node counter.
- * @param empties Packed empty square coordinates.
- * @return The final max score, as a disc difference.
- */
-static int search_solve_3(uint64x2_t OP, int alpha, volatile unsigned long long *n_nodes, uint8x8_t empties)
-{
-	uint64x2_t flipped;
-	int score, bestscore, x, pol;
-	unsigned long long opponent;
-=======
- * Get the final score, when 3 empty squares remain.
-=======
- * Get the final min score, when 3 empty squares remain.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * Get the final max score, when 3 empty squares remain.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  *
  * @param OP The board to evaluate.
  * @param alpha Alpha bound.
@@ -623,23 +307,10 @@ static int search_solve_3(uint64x2_t OP, int alpha, volatile unsigned long long
 	uint64x2_t flipped;
 	int score, bestscore, x, pol;
 	unsigned long long opponent;
-<<<<<<< HEAD
-	// const int beta = alpha + 1;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
 
 	SEARCH_STATS(++statistics.n_search_solve_3);
 	SEARCH_UPDATE_INTERNAL_NODES(*n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 	bestscore = -SCORE_INF;
 	pol = 1;
 	do {
@@ -649,81 +320,6 @@ static int search_solve_3(uint64x2_t OP, int alpha, volatile unsigned long long
 		if ((NEIGHBOUR[x] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
 			bestscore = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, empties);
 			if (bestscore > alpha) return bestscore * pol;
-<<<<<<< HEAD
-		}
-
-		x = vget_lane_u8(empties, 1);
-		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, vuzp_u8(empties, empties).val[0]);	// d2d0
-			if (score > alpha) return score * pol;
-			else if (score > bestscore) bestscore = score;
-		}
-
-		x = vget_lane_u8(empties, 0);
-		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, vext_u8(empties, empties, 1));	// d2d1
-			if (score > bestscore) bestscore = score;
-			return bestscore * pol;
-		}
-
-		if (bestscore > -SCORE_INF)
-			return bestscore * pol;
-
-		OP = vextq_u64(OP, OP, 1);	// pass
-		alpha = ~alpha;	// = -(alpha + 1)
-	} while ((pol = -pol) < 0);
-
-	return board_solve_neon(vget_low_u64(OP), 3);	// gameover
-=======
-	// best move alphabeta search
-	bestscore = -SCORE_INF;
-	bb = vgetq_lane_u64(OP, 1);	// opponent
-	x = vget_lane_u8(empties, 2);
-	if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-		bestscore = -board_solve_neon_2(board_next_neon(OP, x, flipped), -(alpha + 1), n_nodes, empties);
-		if (bestscore > alpha) return bestscore;
-	}
-
-	x = vget_lane_u8(empties, 1);
-	if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-		score = -board_solve_neon_2(board_next_neon(OP, x, flipped), -(alpha + 1), n_nodes, vuzp_u8(empties, empties).val[0]);
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	x = vget_lane_u8(empties, 0);
-	if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-		score = -board_solve_neon_2(board_next_neon(OP, x, flipped), -(alpha + 1), n_nodes, vext_u8(empties, empties, 1));
-		if (score > bestscore) bestscore = score;
-	}
-
-	else if (bestscore == -SCORE_INF) {	// pass ?
-=======
-	for (pol = 1; pol >= -1; pol -= 2) {
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	pol = 1;
-	do {
->>>>>>> c0fb778 (small optimizations in endgame)
-		// best move alphabeta search
-		bestscore = -SCORE_INF;
-		bb = vgetq_lane_u64(OP, 1);	// opponent
-=======
-	pol = -1;
-=======
-	bestscore = SCORE_INF;	// min stage
-	pol = 1;
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-	do {
-		// best move alphabeta search
-		opponent = vgetq_lane_u64(OP, 1);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-		x = vget_lane_u8(empties, 2);
-		if ((NEIGHBOUR[x] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			bestscore = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, empties);
-			if (bestscore <= alpha) return bestscore * pol;
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 		}
 
 		x = vget_lane_u8(empties, 1);
@@ -747,51 +343,13 @@ static int search_solve_3(uint64x2_t OP, int alpha, volatile unsigned long long
 		alpha = ~alpha;	// = -(alpha + 1)
 	} while ((pol = -pol) < 0);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	return bestscore;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 	return board_solve_neon(vget_low_u64(OP), 3);	// gameover
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	return board_solve_neon(vget_high_u64(OP), 3);	// gameover
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
-	return board_solve_neon(vget_low_u64(OP), 3);	// gameover
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 }
 
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final min score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final min score, as a disc difference.
- */
-
-static int search_solve_4(Search *search, int alpha)
-{
-	uint64x2_t	OP, flipped;
-	uint8x16_t	empties_series;	// B15:4th, B11:3rd, B7:2nd, B3:1st, lower 3 bytes for 3 empties
-	uint8x16_t	shuf;
-	int x1, x2, x3, x4, paritysort, score, bestscore, pol;
-	unsigned long long opponent;
-=======
- * Get the final score, when 4 empty squares remain.
-=======
- * Get the final max score, when 4 empty squares remain.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
  * Get the final min score, when 4 empty squares remain.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
  *
  * @param search Search position.
  * @param alpha Upper score value.
@@ -804,12 +362,7 @@ static int search_solve_4(Search *search, int alpha)
 	uint8x16_t	empties_series;	// B15:4th, B11:3rd, B7:2nd, B3:1st, lower 3 bytes for 3 empties
 	uint8x16_t	shuf;
 	int x1, x2, x3, x4, paritysort, score, bestscore, pol;
-<<<<<<< HEAD
-	unsigned long long opp;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 	unsigned long long opponent;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
 	// const int beta = alpha + 1;
 	static const unsigned char parity_case[64] = {	/* x4x3x2x1 = */
 		/*0000*/  0, /*0001*/  0, /*0010*/  1, /*0011*/  9, /*0100*/  2, /*0101*/ 10, /*0110*/ 11, /*0111*/  3,
@@ -821,10 +374,6 @@ static int search_solve_4(Search *search, int alpha)
 		/*0220*/ 11, /*0221*/  5, /*0230*/  6, /*0231*/  0, /*0320*/  6, /*0321*/  0, /*0330*/ 11, /*0331*/  5,
 		/*0222*/  3, /*0223*/  5, /*0232*/  7, /*0233*/  8, /*0322*/  8, /*0323*/  7, /*0332*/  5, /*0333*/  3
 	};
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	static const uint64x2_t shuf_mask[] = {
 		{ 0x0203010003020100, 0x0003020101030200 },	//  0: 1(x1) 3(x2 x3 x4), 1(x1) 1(x2) 2(x3 x4), 1 1 1 1, 4
 		{ 0x0203010003020100, 0x0002030101020300 },	//  1: 1(x2) 3(x1 x3 x4)
@@ -835,60 +384,16 @@ static int search_solve_4(Search *search, int alpha)
 		{ 0x0102030002010300, 0x0003020103020100 },	//  6: 1(x2) 1(x3) 2(x1 x4)	x4x1x2x3-x1x2x3x4-x3x2x1x4-x2x3x1x4
 		{ 0x0002030102000301, 0x0103020003020100 },	//  7: 1(x2) 1(x4) 2(x1 x3)	x3x1x2x4-x1x2x3x4-x4x2x1x3-x2x4x1x3
 		{ 0x0001030201000302, 0x0203010003020100 },	//  8: 1(x3) 1(x4) 2(x1 x2)	x2x1x3x4-x1x2x3x4-x4x3x1x2-x3x4x1x2
-<<<<<<< HEAD
-<<<<<<< HEAD
-		{ 0x0203010003020100, 0x0001030201000302 },	//  9: 2(x1 x2) 2(x3 x4)	x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		{ 0x0200030103010200, 0x0002030101030200 },	// 10: 2(x1 x3) 2(x2 x4)	x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		{ 0x0201030003000201, 0x0003020101020300 }	// 11: 2(x1 x4) 2(x2 x3)	x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
-=======
-	static const uint32x4_t shuf_mask[] = {
-		{ 0x03020100, 0x02030100, 0x01030200, 0x00030201 },	//  0: 1(x1) 3(x2 x3 x4), 1(x1) 1(x2) 2(x3 x4), 1 1 1 1, 4
-		{ 0x03020100, 0x02030100, 0x01020300, 0x00020301 },	//  1: 1(x2) 3(x1 x3 x4)
-		{ 0x03010200, 0x02010300, 0x01030200, 0x00010302 },	//  2: 1(x3) 3(x1 x2 x4)
-		{ 0x03000201, 0x02000301, 0x01000302, 0x00030201 },	//  3: 1(x4) 3(x1 x2 x3)
-		{ 0x03010200, 0x01030200, 0x02030100, 0x00030201 },	//  4: 1(x1) 1(x3) 2(x2 x4)	x4x1x2x3-x2x1x3x4-x3x1x2x4-x1x3x2x4
-		{ 0x03000201, 0x00030201, 0x02030100, 0x01030200 },	//  5: 1(x1) 1(x4) 2(x2 x3)	x3x1x2x4-x2x1x3x4-x4x1x2x3-x1x4x2x3
-		{ 0x02010300, 0x01020300, 0x03020100, 0x00030201 },	//  6: 1(x2) 1(x3) 2(x1 x4)	x4x1x2x3-x1x2x3x4-x3x2x1x4-x2x3x1x4
-		{ 0x02000301, 0x00020301, 0x03020100, 0x01030200 },	//  7: 1(x2) 1(x4) 2(x1 x3)	x3x1x2x4-x1x2x3x4-x4x2x1x3-x2x4x1x3
-		{ 0x01000302, 0x00010302, 0x03020100, 0x02030100 },	//  8: 1(x3) 1(x4) 2(x1 x2)	x2x1x3x4-x1x2x3x4-x4x3x1x2-x3x4x1x2
-		{ 0x03020100, 0x02030100, 0x01000302, 0x00010302 },	//  9: 2(x1 x2) 2(x3 x4)		x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		{ 0x03010200, 0x02000301, 0x01030200, 0x00020301 },	// 10: 2(x1 x3) 2(x2 x4)		x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		{ 0x03000201, 0x02010300, 0x01020300, 0x00030201 }	// 11: 2(x1 x4) 2(x2 x3)		x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-		{ 0x0203010003020100, 0x0001030201000302 },	//  9: 2(x1 x2) 2(x3 x4)		x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		{ 0x0200030103010200, 0x0002030101030200 },	// 10: 2(x1 x3) 2(x2 x4)		x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		{ 0x0201030003000201, 0x0003020101020300 }	// 11: 2(x1 x4) 2(x2 x3)		x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
 		{ 0x0203010003020100, 0x0001030201000302 },	//  9: 2(x1 x2) 2(x3 x4)	x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
 		{ 0x0200030103010200, 0x0002030101030200 },	// 10: 2(x1 x3) 2(x2 x4)	x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
 		{ 0x0201030003000201, 0x0003020101020300 }	// 11: 2(x1 x4) 2(x2 x3)	x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 	};
 
 	SEARCH_STATS(++statistics.n_search_solve_4);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	// stability cutoff (try 12%, cut 7%)
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if (search_SC_NWS_4(search, alpha, &score)) return score;
-=======
-	// stability cutoff
-=======
-	// stability cutoff (try 12%, cut 7%)
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-	if (search_SC_NWS(search, alpha, &score)) return score;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	if (search_SC_NWS(search, alpha, 4, &score)) return score;
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
-	if (search_SC_NWS_4(search, alpha, &score)) return score;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 
 	OP = vld1q_u64((uint64_t *) &search->board);
 	x1 = search->empties[NOMOVE].next;
@@ -902,8 +407,6 @@ static int search_solve_4(Search *search, int alpha)
 	// Only the 1 1 2 case needs move sorting on this ply.
 	empties_series = vreinterpretq_u8_u32(vdupq_n_u32((x1 << 24) | (x2 << 16) | (x3 << 8) | x4));
 	paritysort = parity_case[((x3 ^ x4) & 0x24) + (((x2 ^ x4) & 0x24) >> 1) + (((x1 ^ x4) & 0x24) >> 2)];
-<<<<<<< HEAD
-<<<<<<< HEAD
 	shuf = vreinterpretq_u8_u64(shuf_mask[paritysort]);
 #ifdef HAS_CPU_64
 	empties_series = vqtbl1q_u8(empties_series, shuf);
@@ -912,18 +415,7 @@ static int search_solve_4(Search *search, int alpha)
 		vtbl1_u8(vget_low_u8(empties_series), vget_high_u8(shuf)));
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	bestscore = SCORE_INF;	// min stage
-=======
-	bestscore = -SCORE_INF;
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
-	bestscore = SCORE_INF;	// min stage
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 	pol = 1;
 	do {
 		// best move alphabeta search
@@ -965,130 +457,6 @@ static int search_solve_4(Search *search, int alpha)
 		OP = vextq_u64(OP, OP, 1);	// pass
 		alpha = ~alpha;	// = -(alpha + 1)
 	} while ((pol = -pol) < 0);
-<<<<<<< HEAD
 
 	return board_solve_neon(vget_high_u64(OP), 4);	// gameover
-<<<<<<< HEAD
-=======
-	shuf = shuf_mask[paritysort];
-=======
-	shuf = vreinterpretq_u8_u64(shuf_mask[paritysort]);
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-#ifdef HAS_CPU_64
-	empties_series = vqtbl1q_u8(empties_series, shuf);
-#else
-	empties_series = vcombine_u8(vtbl1_u8(vget_low_u8(empties_series), vget_low_u8(shuf)),
-		vtbl1_u8(vget_low_u8(empties_series), vget_high_u8(shuf)));
-#endif
-
-	// best move alphabeta search
-	bestscore = -SCORE_INF;
-	opp = vgetq_lane_u64(OP, 1);
-	x1 = vgetq_lane_u8(empties_series, 3);
-	if ((NEIGHBOUR[x1] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = -search_solve_sse_3(board_next_neon(OP, x1, flipped), -(alpha + 1), &search->n_nodes, vget_low_u8(empties_series));
-		if (bestscore > alpha) return bestscore;
-	}
-
-	empties_series = vextq_u8(empties_series, empties_series, 4);
-	x2 = vgetq_lane_u8(empties_series, 3);
-	if ((NEIGHBOUR[x2] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-		score = -search_solve_sse_3(board_next_neon(OP, x2, flipped), -(alpha + 1), &search->n_nodes, vget_low_u8(empties_series));
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	empties_series = vextq_u8(empties_series, empties_series, 4);
-	x3 = vgetq_lane_u8(empties_series, 3);
-	if ((NEIGHBOUR[x3] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x3))) {
-		score = -search_solve_sse_3(board_next_neon(OP, x3, flipped), -(alpha + 1), &search->n_nodes, vget_low_u8(empties_series));
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	empties_series = vextq_u8(empties_series, empties_series, 4);
-	x4 = vgetq_lane_u8(empties_series, 3);
-	if ((NEIGHBOUR[x4] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x4))) {
-		score = -search_solve_sse_3(board_next_neon(OP, x4, flipped), -(alpha + 1), &search->n_nodes, vget_low_u8(empties_series));
-		if (score > bestscore) bestscore = score;
-	}
-
-	else if (bestscore == -SCORE_INF) {	// no move
-		if (can_move(opp, vgetq_lane_u64(OP, 0))) { // pass
-			search_pass_endgame(search);
-			bestscore = -search_solve_4(search, -(alpha + 1));
-			search_pass_endgame(search);
-
-		} else { // gameover
-			bestscore = board_solve_neon(vget_low_u64(OP), 4);
-=======
-	for (pol = 1; pol >= -1; pol -= 2) {
-=======
-	pol = 1;
-	do {
->>>>>>> c0fb778 (small optimizations in endgame)
-		// best move alphabeta search
-		bestscore = -SCORE_INF;
-		opp = vgetq_lane_u64(OP, 1);
-		x1 = vgetq_lane_u8(empties_series, 3);
-		if ((NEIGHBOUR[x1] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-			bestscore = -search_solve_3(board_flip_next(OP, x1, flipped), ~alpha, &search->n_nodes, vget_low_u8(empties_series));
-			if (bestscore > alpha) return bestscore * pol;
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	pol = -1;
-	do {
-		// best move alphabeta search
-		alpha = ~alpha;	// = -(alpha + 1)
-		bestscore = SCORE_INF;	// Negative score
-		opponent = vgetq_lane_u64(OP, 1);
-		x1 = vgetq_lane_u8(empties_series, 3);
-		if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-			bestscore = search_solve_3(board_flip_next(OP, x1, flipped), alpha, &search->n_nodes,
-				vget_low_u8(empties_series));
-			if (bestscore <= alpha) return bestscore * pol;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-		}
-
-		x2 = vgetq_lane_u8(empties_series, 7);
-		if ((NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-			score = search_solve_3(board_flip_next(OP, x2, flipped), alpha, &search->n_nodes,
-				vget_low_u8(vextq_u8(empties_series, empties_series, 4)));
-			if (score <= alpha) return score * pol;
-			else if (score < bestscore) bestscore = score;
-		}
-
-		x3 = vgetq_lane_u8(empties_series, 11);
-		if ((NEIGHBOUR[x3] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x3))) {
-			score = search_solve_3(board_flip_next(OP, x3, flipped), alpha, &search->n_nodes,
-				vget_high_u8(empties_series));
-			if (score <= alpha) return score * pol;
-			else if (score < bestscore) bestscore = score;
-		}
-
-		x4 = vgetq_lane_u8(empties_series, 15);
-		if ((NEIGHBOUR[x4] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x4))) {
-			score = search_solve_3(board_flip_next(OP, x4, flipped), alpha, &search->n_nodes,
-				vget_low_u8(vextq_u8(empties_series, empties_series, 12)));
-			if (score < bestscore) bestscore = score;
-			return bestscore * pol;
-		}
-
-		if (bestscore < SCORE_INF)
-			return bestscore * pol;
-
-		OP = vextq_u64(OP, OP, 1);	// pass
-	} while ((pol = -pol) >= 0);
-=======
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-
-<<<<<<< HEAD
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	return bestscore;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	return board_solve_neon(vget_low_u64(OP), 4);	// gameover
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 }
diff --git a/src/endgame_sse.c b/src/endgame_sse.c
index 826489c..f007739 100644
--- a/src/endgame_sse.c
+++ b/src/endgame_sse.c
@@ -1,64 +1,22 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
 /**
  * @file endgame_sse.c
-=======
-/**
-<<<<<<< HEAD
- * @file endgame_avx.c
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @file endgame_sse.c
->>>>>>> 72924b1 (Fix macro expansion; correct comments)
  *
  *
  * SSE / AVX optimized version of endgame.c for the last four empties.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * Bitboard and empty list are kept in SSE registers.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> a09308f (Renew version string and copyright year)
  * @date 1998 - 2024
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * Bitboard and empty list is kept in SSE registers, but performance gain
- * is limited for GCC minGW build since vectorcall is not supported.
-=======
- * Bitboard and empty list are kept in SSE registers.
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
- *
- * @date 1998 - 2022
-=======
- * @date 1998 - 2023
->>>>>>> ea8595b (Split v3hi_empties from search_solve_3 & moved to solve_4)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
-<<<<<<< HEAD
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @version 4.5
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
  * 
  */
 
 #include "bit.h"
 #include "settings.h"
 #include "search.h"
-<<<<<<< HEAD
-<<<<<<< HEAD
-#include <stdint.h>
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #include <stdint.h>
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 #include <assert.h>
 
 #define	SWAP64	0x4e	// for _mm_shuffle_epi32
@@ -66,15 +24,11 @@
 #define	DUPHI	0xee
 
 #if defined(__AVX__) && (defined(__x86_64__) || defined(_M_X64))
-<<<<<<< HEAD
-<<<<<<< HEAD
 	#define	EXTRACT_O(OP)	_mm_extract_epi64(OP, 1)
 #else
 	#define	EXTRACT_O(OP)	_mm_cvtsi128_si64(_mm_shuffle_epi32(OP, DUPHI))
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(__AVX__) || defined(__SSE4_1__)
 	static inline int vectorcall TESTZ_FLIP(__m128i X) { return _mm_testz_si128(X, X); }
 #elif defined(__x86_64__) || defined(_M_X64)
@@ -86,48 +40,6 @@
 #if defined(__AVX512VL__) || defined(__AVX10_1__)
 	#define	TEST_EPI8_MASK32(X,Y)	_cvtmask32_u32(_mm256_test_epi8_mask((X), (Y)))
 	#define	TEST1_EPI8_MASK32(X)	_cvtmask32_u32(_mm256_test_epi8_mask((X), (X)))
-<<<<<<< HEAD
-	#define	TEST_EPI8_MASK16(X,Y)	_cvtmask16_u32(_mm_test_epi8_mask((X), (Y)))
-#else	// AVX2
-	#define	TEST_EPI8_MASK32(X,Y)	_mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), _mm256_and_si256((X),(Y))))
-	#define	TEST1_EPI8_MASK32(X)	_mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), (X)))
-	#define	TEST_EPI8_MASK16(X,Y)	_mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128((X),(Y))))
-#endif
-
-// in count_last_flip_sse.c
-extern const uint8_t COUNT_FLIP[8][256];
-=======
-#define	EXTRACT_O(OP)	_mm_extract_epi64(OP, 1)
-=======
-	#define	EXTRACT_O(OP)	_mm_extract_epi64(OP, 1)
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-#else
-	#define	EXTRACT_O(OP)	_mm_cvtsi128_si64(_mm_shuffle_epi32(OP, DUPHI))
-#endif
-
-#ifdef __AVX__
-=======
-#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512)
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-	#define	vflip	__m256i
-	static inline int vectorcall TESTZ_FLIP(__m256i X) { return _mm256_testz_si256(X, X); }
-#else
-	#define	vflip	__m128i
-  #if defined(__x86_64__) || defined(_M_X64)
-=======
-#if defined(__AVX__) || defined(__SSE4_1__)
-	static inline int vectorcall TESTZ_FLIP(__m128i X) { return _mm_testz_si128(X, X); }
-#elif defined(__x86_64__) || defined(_M_X64)
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
-	#define TESTZ_FLIP(X)	(!_mm_cvtsi128_si64(X))
-#else
-	static inline int vectorcall TESTZ_FLIP(__m128i X) { return !_mm_cvtsi128_si32(_mm_packs_epi16(X, X)); }
-#endif
-
-#if defined(__AVX512VL__) || defined(__AVX10_1__)
-	#define	TEST_EPI8_MASK32(X,Y)	_cvtmask32_u32(_mm256_test_epi8_mask((X), (Y)))
-=======
->>>>>>> c0e1bc1 (Add TEST1_EPI8_MASK32 and remove TESTNOT_EPI8_MASK32 in board_score_sse_1)
 	#define	TEST_EPI8_MASK16(X,Y)	_cvtmask16_u32(_mm_test_epi8_mask((X), (Y)))
 #else	// AVX2
 	#define	TEST_EPI8_MASK32(X,Y)	_mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), _mm256_and_si256((X),(Y))))
@@ -136,12 +48,7 @@ extern const uint8_t COUNT_FLIP[8][256];
 #endif
 
 // in count_last_flip_sse.c
-<<<<<<< HEAD
-extern const unsigned char COUNT_FLIP[8][256];
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 extern const uint8_t COUNT_FLIP[8][256];
->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask)
 extern const V4DI mask_dvhd[64];
 
 /**
@@ -149,63 +56,25 @@ extern const V4DI mask_dvhd[64];
  *
  * @param OP board to play the move on.
  * @param x move to play.
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @param flipped flipped returned from mm_Flip.
- * @return resulting board.
- */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-static inline __m128i vectorcall board_flip_next(__m128i OP, int x, __m128i flipped)
-{
-	OP = _mm_xor_si128(OP, _mm_or_si128(reduce_vflip(flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
-=======
- * @param next resulting board.
- * @return true if no flips.
-=======
  * @param flipped flipped returned from mm_Flip.
  * @return resulting board.
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
  */
-static inline __m128i board_next_sse(__m128i OP, int x, __m128i flipped)
-=======
-static inline __m128i board_flip_next(__m128i OP, int x, __m128i flipped)
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-=======
 static inline __m128i vectorcall board_flip_next(__m128i OP, int x, __m128i flipped)
->>>>>>> 0ba5408 (add vectorcall to inline functions in case not inlined)
-{
-	OP = _mm_xor_si128(OP, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-static inline __m128i vectorcall board_flip_next(__m128i OP, int x, vflip flipped)
-=======
-static inline __m128i vectorcall board_flip_next(__m128i OP, int x, __m128i flipped)
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
 {
 	OP = _mm_xor_si128(OP, _mm_or_si128(reduce_vflip(flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
 	return _mm_shuffle_epi32(OP, SWAP64);
 }
 
 /**
  * @brief Get the final score.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * Get the final score, when 1 empty square remain.
  * The original code has been adapted from Zebra by Gunnar Anderson.
  *
  * @param PO     Board to evaluate. (O ignored)
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @param alpha  Alpha bound. (beta - 1)
  * @param pos    Last empty square to play.
  * @return       The final score, as a disc difference.
-<<<<<<< HEAD
  */
 #if LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2
 // PEXT count last flip (2.60s on skylake, 2.35 on icelake, 2.16s on Zen4), very slow on Zen1/2
@@ -683,978 +552,146 @@ static int vectorcall board_solve_2(__m128i OP, int alpha, volatile unsigned lon
 	int x1 = _mm_extract_epi16(empties, 1);
 	int x2 = _mm_extract_epi16(empties, 0);
 	unsigned long long opponent;
-=======
- * Get the final score, when no move can be made.
- *
- * @param OP Board.
- * @param n_empties Number of empty squares remaining on the board.
- * @return The final score, as a disc difference.
- */
-static int vectorcall board_solve_sse(__m128i OP, int n_empties)
-{
-	int score = bit_count(_mm_cvtsi128_si64(OP)) * 2 - SCORE_MAX;	// in case of opponents win
-	int diff = score + n_empties;		// = n_discs_p - (64 - n_empties - n_discs_p)
-
-	SEARCH_STATS(++statistics.n_search_solve);
+	// const int beta = alpha + 1;
 
-	if (diff >= 0)
-		score = diff;
-	if (diff > 0)
-		score += n_empties;
-	return score;
-}
+	SEARCH_STATS(++statistics.n_board_solve_2);
 
-/**
- * @brief Get the final score.
- *
-=======
->>>>>>> 26dad03 (Use player bits only in board_score_1)
- * Get the final score, when 1 empty squares remain.
- * The following code has been adapted from Zebra by Gunnar Anderson.
-=======
- * Get the final score, when 1 empty square remain.
- * The original code has been adapted from Zebra by Gunnar Anderson.
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
- *
- * @param PO     Board to evaluate. (O ignored)
- * @param beta   Beta bound.
-=======
- * @param beta   Beta bound - 1.
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
- * @param alpha  Alpha bound. (beta - 1)
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
- * @param pos    Last empty square to play.
- * @return       The final opponent score, as a disc difference.
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
- */
-#if LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2
-// PEXT count last flip (2.60s on skylake, 2.35 on icelake, 2.16s on Zen4), very slow on Zen1/2
-extern const unsigned long long mask_x[64][4];
+	opponent = EXTRACT_O(OP);
+	if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
+		bestscore = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x2);
 
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	uint_fast8_t	n_flips;
-	unsigned int	th, tv;
-	unsigned long long P = _mm_cvtsi128_si64(OP);
-	unsigned long long mP;
-	int	score, score2;
-	const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
+		if ((bestscore > alpha) && (NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
+			score = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x1);
+			if (score < bestscore)
+				bestscore = score;
+			nodes = 3;
+		} else	nodes = 2;
 
-	mP = P & mask_x[pos][3];	// mask out unrelated bits to make dummy 0 bits for outside
-	// n_flips  = COUNT_FLIP_X[th = _bextr_u64(mP, pos & 0x38, 8)];
-	n_flips  = COUNT_FLIP_X[th = (mP >> (pos & 0x38)) & 0xFF];
-	n_flips += COUNT_FLIP_Y[_pext_u64(mP, mask_x[pos][0])];
-	n_flips += COUNT_FLIP_Y[_pext_u64(mP, mask_x[pos][1])];
-	n_flips += COUNT_FLIP_Y[tv = _pext_u64(mP, mask_x[pos][2])];
+	} else if ((NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
+		bestscore = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x1);
+		nodes = 2;
 
-	score = 2 * bit_count(P) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-	score += n_flips;
+	} else {	// pass - NEIGHBOUR test is almost 100% true
+		alpha = ~alpha;	// = -alpha - 1
+		if (!TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x1))) {
+			bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x2);
 
-	if (n_flips == 0) {
-		score2 = score - 2;	// empty for opponent
-		if (score <= 0)
-			score = score2;
+			if ((bestscore > alpha) && !TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x2))) {
+				score = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x1);
+				if (score < bestscore)
+					bestscore = score;
+				nodes = 3;
+			} else	nodes = 2;
 
-		if (score > alpha) {	// lazy cut-off
-			mP = ~P & mask_x[pos][3];
-			n_flips  = COUNT_FLIP_X[th ^ 0xFF];
-			n_flips += COUNT_FLIP_Y[_pext_u64(mP, mask_x[pos][0])];
-			n_flips += COUNT_FLIP_Y[_pext_u64(mP, mask_x[pos][1])];
-			n_flips += COUNT_FLIP_Y[tv ^ 0xFF];
+		} else if (!TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x2))) {
+			bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x1);
+			nodes = 2;
 
-			if (n_flips != 0)
-				score = score2 - n_flips;
+		} else {	// gameover
+			bestscore = board_solve(_mm_cvtsi128_si64(OP), 2);
+			nodes = 1;
 		}
+		bestscore = -bestscore;
 	}
 
-	return score;
+	SEARCH_UPDATE_2EMPTIES_NODES(*n_nodes += nodes;)
+	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
+	assert((bestscore & 1) == 0);
+	return bestscore;
 }
 
-#elif (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_AVX512) && defined(SIMULLASTFLIP512)
-// branchless AVX512(512) lastflip (2.71s on skylake, 2.48 on icelake, 2.15s on Zen4)
+/**
+ * @brief Get the final score.
+ *
+ * Get the final max score, when 3 empty squares remain.
+ *
+ * @param OP The board to evaluate.
+ * @param alpha Alpha bound.
+ * @param n_nodes Node counter.
+ * @param empties Packed empty square coordinates.
+ * @return The final max score, as a disc difference.
+ */
+static int vectorcall search_solve_3(__m128i OP, int alpha, volatile unsigned long long *n_nodes, __m128i empties)
+{
+	__m128i flipped;
+	int score, bestscore, x, pol;
+	unsigned long long opponent;
 
-extern	const V8DI lrmask[66];	// in flip_avx512cd.c
+	SEARCH_STATS(++statistics.n_search_solve_3);
+	SEARCH_UPDATE_INTERNAL_NODES(*n_nodes);
 
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	int	score;
-	__m512i	op_outflank, op_flip, op_eraser, mask;
-	__m256i	o_flip, opop_flip;
-	__mmask8 op_pass;
-	__m512i	O4P4 = _mm512_xor_si512(_mm512_broadcastq_epi64(OP),
-		 _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0));
+#ifdef __AVX__
+	empties = _mm_cvtepu8_epi16(empties);
+#elif defined(__SSSE3__)
+	empties = _mm_unpacklo_epi8((empties), _mm_setzero_si128());
+#endif
+	bestscore = -SCORE_INF;
+	pol = 1;
+	do {
+		// best move alphabeta search
+		opponent = EXTRACT_O(OP);
+		x = _mm_extract_epi16(empties, 2);
+		if ((NEIGHBOUR[x] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
+			bestscore = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, empties);
+			if (bestscore > alpha) return bestscore * pol;
+		}
 
-		// left: look for player LS1B
-	mask = _mm512_broadcast_i64x4(lrmask[pos].v4[0]);
-	op_outflank = _mm512_and_si512(O4P4, mask);
-		// set below LS1B if P is in lmask
-	op_flip = _mm512_maskz_add_epi64(_mm512_test_epi64_mask(op_outflank, op_outflank),
-		op_outflank, _mm512_set1_epi64(-1));
-	// op_flip = _mm512_and_si512(_mm512_andnot_si512(op_outflank, op_flip), mask);
-	op_flip = _mm512_ternarylogic_epi64(op_outflank, op_flip, mask, 0x08);
+		x = _mm_extract_epi16(empties, 1);
+		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
+			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xd8));	// (d3d1)d2d0
+			if (score > alpha) return score * pol;
+			else if (score > bestscore) bestscore = score;
+		}
 
-		// right: clear all bits lower than outflank
-	mask = _mm512_broadcast_i64x4(lrmask[pos].v4[1]);
-	op_outflank = _mm512_and_si512(O4P4, mask);
-	op_eraser = _mm512_srlv_epi64(_mm512_set1_epi64(-1),
-		_mm512_maskz_lzcnt_epi64(_mm512_test_epi64_mask(op_outflank, op_outflank), op_outflank));
-	// op_flip = _mm512_or_si512(op_flip, _mm512_andnot_si512(op_eraser, mask));
-	op_flip = _mm512_ternarylogic_epi64(op_flip, op_eraser, mask, 0xf2);
+		x = _mm_extract_epi16(empties, 0);
+		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
+			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xc9));	// (d3d0)d2d1
+			if (score > bestscore) bestscore = score;
+			return bestscore * pol;
+		}
 
-	o_flip = _mm512_extracti64x4_epi64(op_flip, 1);
-	opop_flip = _mm256_or_si256(_mm256_unpacklo_epi64(_mm512_castsi512_si256(op_flip), o_flip),
-		_mm256_unpackhi_epi64(_mm512_castsi512_si256(op_flip), o_flip));
-	OP = _mm_xor_si128(_mm512_castsi512_si128(O4P4),
-		_mm_or_si128(_mm256_castsi256_si128(opop_flip), _mm256_extracti128_si256(opop_flip, 1)));
-	op_pass = _mm_cmpeq_epi64_mask(OP, _mm512_castsi512_si128(O4P4));
-	OP = _mm_mask_unpackhi_epi64(OP, op_pass, OP, OP);	// use O if p_pass
-	score = bit_count(_mm_cvtsi128_si64(OP));
-		// last square for P if not P pass or (O pass and score >= 32)
-	// score += ((~op_pass & 1) | ((op_pass >> 1) & (score >= 32)));
-	score += (~op_pass | ((op_pass >> 1) & (score >> 5))) & 1;
-	(void) alpha;	// no lazy cut-off
-	return score * 2 - SCORE_MAX;	// = bit_count(P) - (SCORE_MAX - bit_count(P))
+		if (bestscore > -SCORE_INF)
+			return bestscore * pol;
+
+		OP = _mm_shuffle_epi32(OP, SWAP64);	// pass
+		alpha = ~alpha;	// = -(alpha + 1)
+	} while ((pol = -pol) < 0);
+
+	return board_solve(_mm_cvtsi128_si64(OP), 3);	// gameover
 }
 
-#elif (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_AVX512) && defined(SIMULLASTFLIP)
-// branchless AVX512(256) lastflip (2.61s on skylake, 2.38 on icelake, 2.13s on Zen4)
+/**
+ * @brief Get the final score.
+ *
+ * Get the final min score, when 4 empty squares remain.
+ *
+ * @param search Search position.
+ * @param alpha Upper score value.
+ * @return The final min score, as a disc difference.
+ */
 
-extern	const V8DI lrmask[66];	// in flip_avx512cd.c
+// pick the move for this ply and pass the rest as packed 3 x 8 bit (AVX/SSSE3) or 3 x 16 bit (SSE), in search order.
+#if defined(__SSSE3__) || defined(__AVX__)
+  #ifdef __AVX__
+	#define	EXTRACT_MOVE(X,i)	_mm_extract_epi8((X), (i) * 4 + 3)
+  #else
+	#define	EXTRACT_MOVE(X,i)	(_mm_extract_epi16((X), (i) * 2 + 1) >> 8)
+  #endif
+	#define	v3_empties_0(empties,sort3)	(empties)
+	#define	v3_empties(empties,i,shuf,sort3)	_mm_srli_si128((empties), (i) * 4)
+#else
+	#define	EXTRACT_MOVE(X,i)	_mm_extract_epi16((X), 3 - (i))
+	static inline __m128i vectorcall v3_empties_0(__m128i empties, int sort3) {
+		// parity based move sorting
+		// if (sort3 == 3) empties = _mm_shufflelo_epi16(empties, 0xe1); // swap x2, x3
+		if (sort3 & 2)	empties = _mm_shufflelo_epi16(empties, 0xc9); // case 1(x3) 2(x1 x2): x3->x1->x2->x3
+		if (sort3 & 1)	empties = _mm_shufflelo_epi16(empties, 0xd8); // case 1(x2) 2(x1 x3): swap x1, x2
+		return empties;
+	}
+	#define	v3_empties(empties,i,shuf,sort3)	v3_empties_0(_mm_shufflelo_epi16((empties), (shuf)), (sort3))
+#endif
 
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	int	score;
-	__m256i	p_flip, o_flip, p_outflank, o_outflank, p_eraser, o_eraser, mask, opop_flip;
-	__mmask8 op_pass;
-	__m256i	P4 = _mm256_broadcastq_epi64(OP);
-
-		// left: look for player LS1B
-	mask = lrmask[pos].v4[0];
-	p_outflank = _mm256_and_si256(P4, mask);	o_outflank = _mm256_andnot_si256(P4, mask);
-		// set below LS1B if P is in lmask
-	p_flip = _mm256_maskz_add_epi64(_mm256_test_epi64_mask(P4, mask), p_outflank, _mm256_set1_epi64x(-1));
-		// set below LS1B if O is in lmask
-	o_flip = _mm256_maskz_add_epi64(_mm256_test_epi64_mask(o_outflank, o_outflank), o_outflank, _mm256_set1_epi64x(-1));
-	// p_flip = _mm256_and_si256(_mm256_andnot_si256(p_outflank, p_flip), mask);
-	p_flip = _mm256_ternarylogic_epi64(p_outflank, p_flip, mask, 0x08);
-	// o_flip = _mm256_and_si256(_mm256_andnot_si256(o_outflank, o_flip), mask);
-	o_flip = _mm256_ternarylogic_epi64(o_outflank, o_flip, mask, 0x08);
-
-		// right: clear all bits lower than outflank
-	mask = lrmask[pos].v4[1];
-	p_outflank = _mm256_and_si256(P4, mask);	o_outflank = _mm256_andnot_si256(P4, mask);
-	p_eraser = _mm256_srlv_epi64(_mm256_set1_epi64x(-1),
-		_mm256_maskz_lzcnt_epi64(_mm256_test_epi64_mask(P4, mask), p_outflank));
-	o_eraser = _mm256_srlv_epi64(_mm256_set1_epi64x(-1),
-		_mm256_maskz_lzcnt_epi64(_mm256_test_epi64_mask(o_outflank, o_outflank), o_outflank));
-	// p_flip = _mm256_or_si256(p_flip, _mm256_andnot_si256(p_eraser, mask));
-	p_flip = _mm256_ternarylogic_epi64(p_flip, p_eraser, mask, 0xf2);
-	// o_flip = _mm256_or_si256(o_flip, _mm256_andnot_si256(o_eraser, mask));
-	o_flip = _mm256_ternarylogic_epi64(o_flip, o_eraser, mask, 0xf2);
-
-	opop_flip = _mm256_or_si256(_mm256_unpacklo_epi64(p_flip, o_flip), _mm256_unpackhi_epi64(p_flip, o_flip));
-	OP = _mm_xor_si128(_mm256_castsi256_si128(P4),
-		_mm_or_si128(_mm256_castsi256_si128(opop_flip), _mm256_extracti128_si256(opop_flip, 1)));
-	op_pass = _mm_cmpeq_epi64_mask(OP, _mm256_castsi256_si128(P4));
-	OP = _mm_mask_unpackhi_epi64(OP, op_pass, OP, OP);	// use O if p_pass
-	score = bit_count(_mm_cvtsi128_si64(OP));
-		// last square for P if not P pass or (O pass and score >= 32)
-	// score += ((~op_pass & 1) | ((op_pass >> 1) & (score >= 32)));
-	score += (~op_pass | ((op_pass >> 1) & (score >> 5))) & 1;
-	(void) alpha;	// no lazy cut-off
-	return score * 2 - SCORE_MAX;	// = bit_count(P) - (SCORE_MAX - bit_count(P))
-}
-
-#elif (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_AVX512) && defined(LASTFLIP_HIGHCUT)
-// AVX512(256) NWS lazy high cut-off version (2.63s on skylake, 2.33 on icelake, 2.14s on Zen4)
-
-extern	const V8DI lrmask[66];	// in flip_avx_ppfill.c
-
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	int score = 2 * bit_count(_mm_cvtsi128_si64(OP)) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-		// if player can move, final score > this score.
-		// if player pass then opponent play, final score < score - 1 (cancel P) - 1 (last O).
-		// if both pass, score - 1 (cancel P) - 1 (empty for O) <= final score <= score (empty for P).
-	__m256i P4 = _mm256_broadcastq_epi64(OP);
-	__m256i lmask = lrmask[pos].v4[0];
-	__m256i rmask = lrmask[pos].v4[1];
-	__mmask16 lp = _mm256_test_epi64_mask(P4, lmask);	// P exists on mask
-	__mmask16 rp = _mm256_test_epi64_mask(P4, rmask);
-	__m256i F4, outflank, eraser, lmO, rmO;
-	__m128i F2;
-	int nflip;
-
-	if (score > alpha) {	// if player can move, high cut-off will occur regardress of n_flips.
-		lmO = _mm256_maskz_andnot_epi64(lp, P4, lmask);	// masked O, clear if all O
-		rmO = _mm256_maskz_andnot_epi64(rp, P4, rmask);	// (all O = all P = 0 flip)
-		if (_mm256_testz_si256(_mm256_or_si256(lmO, rmO), _mm256_set1_epi64x(NEIGHBOUR[pos]))) {
-			// nflip = last_flip(pos, ~P);
-				// left: set below LS1B if O is in lmask
-			F4 = _mm256_maskz_add_epi64(_mm256_test_epi64_mask(lmO, lmO), lmO, _mm256_set1_epi64x(-1));
-			// F4 = _mm256_and_si256(_mm256_andnot_si256(lmO, F4), lmask);
-			F4 = _mm256_ternarylogic_epi64(lmO, F4, lmask, 0x08);
-
-				// right: clear all bits lower than outflank
-			eraser = _mm256_srlv_epi64(_mm256_set1_epi64x(-1),
-				_mm256_maskz_lzcnt_epi64(_mm256_test_epi64_mask(rmO, rmO), rmO));
-			// F4 = _mm256_or_si256(F4, _mm256_andnot_si256(eraser, rmask));
-			F4 = _mm256_ternarylogic_epi64(F4, eraser, rmask, 0xf2);
-
-			F2 = _mm_or_si128(_mm256_castsi256_si128(F4), _mm256_extracti128_si256(F4, 1));
-			nflip = -bit_count(_mm_cvtsi128_si64(_mm_or_si128(F2, _mm_unpackhi_epi64(F2, F2))));
-				// last square for O if O can move or score <= 0
-			score += (nflip - (int)((nflip | (score - 1)) < 0)) * 2;
-
-		} else	score += 2;	// lazy high cut-off, return min flip
-
-	} else {	// if player cannot move, low cut-off will occur whether opponent can move.
-			// left: set below LS1B if P is in lmask
-		outflank = _mm256_and_si256(P4, lmask);
-		F4 = _mm256_maskz_add_epi64(lp, outflank, _mm256_set1_epi64x(-1));
-		// F4 = _mm256_and_si256(_mm256_andnot_si256(outflank, F4), lmask);
-		F4 = _mm256_ternarylogic_epi64(outflank, F4, lmask, 0x08);
-
-			// right: clear all bits lower than outflank
-		outflank = _mm256_and_si256(P4, rmask);
-		eraser = _mm256_srlv_epi64(_mm256_set1_epi64x(-1), _mm256_maskz_lzcnt_epi64(rp, outflank));
-		// F4 = _mm256_or_si256(F4, _mm256_andnot_si256(eraser, rmask));
-		F4 = _mm256_ternarylogic_epi64(F4, eraser, rmask, 0xf2);
-
-		F2 = _mm_or_si128(_mm256_castsi256_si128(F4), _mm256_extracti128_si256(F4, 1));
-		nflip = bit_count(_mm_cvtsi128_si64(_mm_or_si128(F2, _mm_unpackhi_epi64(F2, F2))));
-		score += nflip * 2;
-
-		// if nflip == 0, score <= alpha so lazy low cut-off
-	}
-
-	return score;
-}
-
-#elif (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_AVX_PPFILL) && defined(LASTFLIP_HIGHCUT)
-// experimental AVX2 lastflip with lazy high cut-off version (a little slower)
-extern	const V8DI lrmask[66];	// in flip_avx_ppfill.c
-
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	int score = 2 * bit_count(_mm_cvtsi128_si64(OP)) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-		// if player can move, final score > this score.
-		// if player pass then opponent play, final score < score - 1 (cancel P) - 1 (last O).
-		// if both pass, score - 1 (cancel P) - 1 (empty for O) <= final score <= score (empty for P).
-	__m256i P4 = _mm256_broadcastq_epi64(OP);
-	__m256i F4, lmask, rmask, outflank, eraser, lmO, rmO, lp, rp;
-	__m128i F2;
-	int nflip;
-
-	lmask = lrmask[pos].v4[0];			rmask = lrmask[pos].v4[1];
-	lmO = _mm256_andnot_si256(P4, lmask);		rmO = _mm256_andnot_si256(P4, rmask);
-	lp = _mm256_cmpeq_epi64(lmO, lmask);		rp = _mm256_cmpeq_epi64(rmO, rmask);	// 0 if P exists on mask
-
-	if (score > alpha) {	// if player can move, high cut-off will occur regardress of n_flips.
-		F4 = _mm256_or_si256(_mm256_andnot_si256(lp, lmO), _mm256_andnot_si256(rp, rmO));	// clear if all O
-		if (_mm256_testz_si256(F4, _mm256_set1_epi64x(NEIGHBOUR[pos]))) {	// pass (16%)
-			// n_flips = last_flip(pos, ~P);
-				// right: isolate opponent MS1B by clearing lower shadow bits
-			eraser = _mm256_srlv_epi64(rmO, _mm256_set_epi64x(7, 9, 8, 1));
-				// eraser = opponent's shadow
-			eraser = _mm256_or_si256(eraser, rmO);
-			eraser = _mm256_or_si256(eraser, _mm256_srlv_epi64(eraser, _mm256_set_epi64x(14, 18, 16, 2)));
-			F4 = _mm256_andnot_si256(eraser, rmask);
-			F4 = _mm256_andnot_si256(_mm256_srlv_epi64(eraser, _mm256_set_epi64x(28, 36, 32, 4)), F4);
-				// clear if no opponent bit, i.e. all player
-			F4 = _mm256_andnot_si256(_mm256_cmpeq_epi64(F4, rmask), F4);
-
-				// left: look for opponent LS1B
-			outflank = _mm256_and_si256(lmO, _mm256_sub_epi64(_mm256_setzero_si256(), lmO));	// LS1B
-				// set all bits if outflank = 0, otherwise higher bits than outflank
-			eraser = _mm256_sub_epi64(_mm256_cmpeq_epi64(outflank, _mm256_setzero_si256()), outflank);
-			F4 = _mm256_or_si256(F4, _mm256_andnot_si256(eraser, lmask));
-
-			F2 = _mm_or_si128(_mm256_castsi256_si128(F4), _mm256_extracti128_si256(F4, 1));
-			nflip = -bit_count(_mm_cvtsi128_si64(_mm_or_si128(F2, _mm_unpackhi_epi64(F2, F2))));
-				// last square for O if O can move or score <= 0
-			score += (nflip - (int)((nflip | (score - 1)) < 0)) * 2;
-
-		} else	score += 2;	// lazy high cut-off, return min flip
-
-	} else {	// if player cannot move, low cut-off will occur whether opponent can move.
-			// right: isolate player MS1B by clearing lower shadow bits
-		outflank = _mm256_and_si256(P4, rmask);
-		eraser = _mm256_srlv_epi64(outflank, _mm256_set_epi64x(7, 9, 8, 1));
-			// eraser = player's shadow
-		eraser = _mm256_or_si256(eraser, outflank);
-		eraser = _mm256_or_si256(eraser, _mm256_srlv_epi64(eraser, _mm256_set_epi64x(14, 18, 16, 2)));
-		F4 = _mm256_andnot_si256(eraser, rmask);
-		F4 = _mm256_andnot_si256(_mm256_srlv_epi64(eraser, _mm256_set_epi64x(28, 36, 32, 4)), F4);
-			// clear if no player bit, i.e. all opponent
-		F4 = _mm256_andnot_si256(rp, F4);
-
-			// left: set below LS1B if P is in lmask
-		outflank = _mm256_and_si256(P4, lmask);
-		outflank = _mm256_andnot_si256(outflank, _mm256_add_epi64(outflank, _mm256_set1_epi64x(-1)));
-		F4 = _mm256_or_si256(F4, _mm256_andnot_si256(lp, _mm256_and_si256(outflank, lmask)));
-
-		F2 = _mm_or_si128(_mm256_castsi256_si128(F4), _mm256_extracti128_si256(F4, 1));
-		nflip = bit_count(_mm_cvtsi128_si64(_mm_or_si128(F2, _mm_unpackhi_epi64(F2, F2))));
-		score += nflip * 2;
-
-		// if nflip == 0, score <= alpha so lazy low cut-off
-	}
-
-	return score;
-}
-
-#elif defined(__AVX2__) && defined(SIMULLASTFLIP)
-// experimental branchless AVX2 MOVMSK version (slower on icc, par on msvc)
-// https://eukaryote.hateblo.jp/entry/2020/05/10/033228
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	int_fast8_t	p_flip, o_flip;
-	unsigned int	tP, tO, h;
-	unsigned long long P = _mm_cvtsi128_si64(OP);
-	int	score, score2;
-	const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-
-	__m256i M = mask_dvhd[pos].v4;
-	__m256i PP = _mm256_broadcastq_epi64(OP);
-
-	(void) alpha;	// no lazy cut-off
-	h = (P >> (pos & 0x38)) & 0xFF;
-	tP = TEST_EPI8_MASK32(PP, M);			tO = tP ^ TEST1_EPI8_MASK32(M);
-	p_flip  = COUNT_FLIP_X[h];			o_flip = -COUNT_FLIP_X[h ^ 0xFF];
-	p_flip += COUNT_FLIP_Y[tP & 0xFF];		o_flip -= COUNT_FLIP_Y[tO & 0xFF];
-	p_flip += COUNT_FLIP_Y[(tP >> 16) & 0xFF];	o_flip -= COUNT_FLIP_Y[(tO >> 16) & 0xFF];
-	p_flip += COUNT_FLIP_Y[tP >> 24];		o_flip -= COUNT_FLIP_Y[tO >> 24];
-
-	score = 2 * bit_count(P) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-	score2 = score + o_flip - (int)((o_flip | (score - 1)) < 0) * 2;	// last square for O if O can move or score <= 0
-	score += p_flip;
-	return p_flip ? score : score2;	// gcc/icc inserts branch here, since score2 may be wholly skipped.
-}
-
-#elif defined(__AVX2__) && defined(LASTFLIP_HIGHCUT)
-// AVX2 NWS lazy high cut-off version
-// http://www.amy.hi-ho.ne.jp/okuhara/edaxopt.htm#lazycutoff
-// lazy high cut-off idea was in Zebra by Gunnar Anderson (http://radagast.se/othello/zebra.html),
-// but commented out because mobility check was no faster than counting flips.
-// Now using AVX2, mobility check can be faster than counting flips.
-
-extern	const V8DI lrmask[66];	// in flip_avx_ppfill.c
-
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	int_fast8_t n_flips;
-	uint32_t t;
-	unsigned long long P = _mm_cvtsi128_si64(OP);
-	int score = 2 * bit_count(P) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-		// if player can move, final score > this score.
-		// if player pass then opponent play, final score < score - 1 (cancel P) - 1 (last O).
-		// if both pass, score - 1 (cancel P) - 1 (empty for O) <= final score <= score (empty for P).
-
-	if (score > alpha) {	// if player can move, high cut-off will occur regardress of n_flips.
-  #if 0 // def __AVX512VL__	// may trigger license base downclocking, wrong fingerprint on MSVC
-		__m512i P8 = _mm512_broadcastq_epi64(OP);
-		__m512i M = lrmask[pos].v8;
-		__m512i mO = _mm512_andnot_epi64(P8, M);
-		if (!_mm512_mask_test_epi64_mask(_mm512_test_epi64_mask(P8, M), mO, _mm512_set1_epi64(NEIGHBOUR[pos]))) {	// pass (16%)
-			// n_flips = last_flip(pos, ~P);
-			t = _cvtmask32_u32(_mm256_cmpneq_epi8_mask(_mm512_castsi512_si256(mO), _mm512_extracti64x4_epi64(mO, 1)));	// eq only if l = r = 0
-
-  #elif defined(__AVX512VL__) || defined(__AVX10_1__)	// 256bit AVX512 (2.61s on skylake, 2.37 on icelake, 2.16s on Zen4)
-		__m256i P4 = _mm256_broadcastq_epi64(OP);
-		__m256i M = lrmask[pos].v4[0];
-		__m256i F = _mm256_maskz_andnot_epi64(_mm256_test_epi64_mask(P4, M), P4, M);	// clear if all O
-		M = lrmask[pos].v4[1];
-		// F = _mm256_mask_or_epi64(F, _mm256_test_epi64_mask(P4, M), F, _mm256_andnot_si256(P4, M));
-		F = _mm256_mask_ternarylogic_epi64(F, _mm256_test_epi64_mask(P4, M), P4, M, 0xF2);
-		if (_mm256_testz_si256(F, _mm256_set1_epi64x(NEIGHBOUR[pos]))) {	// pass (16%)
-			// n_flips = last_flip(pos, ~P);
-			// t = _cvtmask32_u32(_mm256_cmpneq_epi8_mask(_mm256_andnot_si256(P4, lM), _mm256_andnot_si256(P4, rM)));
-			t = _cvtmask32_u32(_mm256_test_epi8_mask(F, F));	// all O = all P = 0 flip
-
-  #else	// AVX2
-		__m256i P4 = _mm256_broadcastq_epi64(OP);
-		__m256i M = lrmask[pos].v4[0];
-		__m256i lmO = _mm256_andnot_si256(P4, M);
-		__m256i F = _mm256_andnot_si256(_mm256_cmpeq_epi64(lmO, M), lmO);	// clear if all O
-		M = lrmask[pos].v4[1];
-		__m256i rmO = _mm256_andnot_si256(P4, M);
-		F = _mm256_or_si256(F, _mm256_andnot_si256(_mm256_cmpeq_epi64(rmO, M), rmO));
-		if (_mm256_testz_si256(F, _mm256_set1_epi64x(NEIGHBOUR[pos]))) {	// pass (16%)
-			// n_flips = last_flip(pos, ~P);
-			t = ~_mm256_movemask_epi8(_mm256_cmpeq_epi8(lmO, rmO));	// eq only if l = r = 0
-  #endif
-			const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-
-			n_flips = -COUNT_FLIP[pos & 7][(~P >> (pos & 0x38)) & 0xFF];	// h
-			n_flips -= COUNT_FLIP_Y[(t >> 8) & 0xFF];	// v
-			n_flips -= COUNT_FLIP_Y[(t >> 16) & 0xFF];	// d
-			n_flips -= COUNT_FLIP_Y[t >> 24];	// d
-				// last square for O if O can move or score <= 0
-			score += n_flips - (int)((n_flips | (score - 1)) < 0) * 2;
-		} else	score += 2;	// min flip
-
-	} else {	// if player cannot move, low cut-off will occur whether opponent can move.
-		const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-
-		// n_flips = last_flip(pos, P);
-		t = TEST_EPI8_MASK32(_mm256_broadcastq_epi64(OP), mask_dvhd[pos].v4);
-		n_flips  = COUNT_FLIP[pos & 7][(P >> (pos & 0x38)) & 0xFF];	// h
-		n_flips += COUNT_FLIP_Y[t & 0xFF];	// d
-		n_flips += COUNT_FLIP_Y[(t >> 16) & 0xFF];	// v
-		n_flips += COUNT_FLIP_Y[t >> 24];	// d
-		score += n_flips;
-
-		// if n_flips == 0, score <= alpha so lazy low cut-off
-	}
-
-	return score;
-}
-
-#else	// COUNT_LAST_FLIP_SSE - reasonably fast on all platforms (2.61s on skylake, 2.16s on Zen4)
-static inline int vectorcall board_score_sse_1(__m128i OP, const int alpha, const int pos)
-{
-	uint_fast8_t	n_flips;
-	unsigned int	t;
-	int	score, score2;
-	const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-
-	// n_flips = last_flip(pos, P);
-  #ifdef AVXLASTFLIP	// no gain
-	__m256i M = mask_dvhd[pos].v4;
-	__m256i P4 = _mm256_broadcastq_epi64(OP);
-	unsigned int h = (_mm_cvtsi128_si64(OP) >> (pos & 0x38)) & 0xFF;
-
-	t = TEST_EPI8_MASK32(P4, M);
-	n_flips  = COUNT_FLIP_X[h];
-	n_flips += COUNT_FLIP_Y[t & 0xFF];
-	t >>= 16;
-
-  #else
-	__m128i M0 = mask_dvhd[pos].v2[0];
-	__m128i M1 = mask_dvhd[pos].v2[1];
-	__m128i P2 = _mm_unpacklo_epi64(OP, OP);
-	__m128i II = _mm_sad_epu8(_mm_and_si128(P2, M0), _mm_setzero_si128());
-
-	n_flips  = COUNT_FLIP_X[_mm_extract_epi16(II, 4)];
-	n_flips += COUNT_FLIP_X[_mm_cvtsi128_si32(II)];
-	t = TEST_EPI8_MASK16(P2, M1);
-  #endif
-	n_flips += COUNT_FLIP_Y[t >> 8];
-	n_flips += COUNT_FLIP_Y[t & 0xFF];
-
-	score = 2 * bit_count_si64(OP) - SCORE_MAX + 2;	// = (bit_count(P) + 1) - (SCORE_MAX - 1 - bit_count(P))
-	score += n_flips;
-
-	if (n_flips == 0) {
-		score2 = score - 2;	// empty for player
-		if (score <= 0)
-			score = score2;
-
-		if (score > alpha) {	// lazy cut-off
-			// n_flips = last_flip(pos, ~P);
-  #ifdef AVXLASTFLIP
-			t = TEST1_EPI8_MASK32(_mm256_andnot_si256(P4, M));
-			n_flips  = COUNT_FLIP_X[h ^ 0xFF];
-			n_flips += COUNT_FLIP_Y[t & 0xFF];
-			t >>= 16;
-  #else
-			II = _mm_sad_epu8(_mm_andnot_si128(P2, M0), _mm_setzero_si128());
-			n_flips  = COUNT_FLIP_X[_mm_extract_epi16(II, 4)];
-			n_flips += COUNT_FLIP_X[_mm_cvtsi128_si32(II)];
-			t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_andnot_si128(P2, M1)));
-  #endif
-			n_flips += COUNT_FLIP_Y[t >> 8];
-			n_flips += COUNT_FLIP_Y[t & 0xFF];
-
-			if (n_flips != 0)
-				score = score2 - n_flips;
-		}
-	}
-
-	return score;
-}
-#endif
-
-// from bench.c
-int board_score_1(const unsigned long long player, const int alpha, const int x)
-{
-	return board_score_sse_1(_mm_cvtsi64_si128(player), alpha, x);
-}
-
-/**
- * @brief Get the final score.
- *
- * Get the final min score, when 2 empty squares remain.
- *
- * @param OP The board to evaluate.
- * @param alpha Alpha bound.
- * @param n_nodes Node counter.
- * @param empties Packed empty square coordinates.
- * @return The final min score, as a disc difference.
- */
-static int vectorcall board_solve_2(__m128i OP, int alpha, volatile unsigned long long *n_nodes, __m128i empties)
-{
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long bb;
-<<<<<<< HEAD
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	__m128i flipped;
-	int score, bestscore, nodes;
-	int x1 = empties >> 16;
-	int x2 = (short) empties;
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-=======
-	__m128i flipped, PO;
-=======
-	__m128i PO;
-	vflip	flipped;
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
-	__m128i PO, flipped;
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
-=======
-	__m128i flipped;
->>>>>>> 6c10ed6 (Replace board_score_sse_1 param PO with OP)
-	int score, bestscore, nodes;
-	int x1 = _mm_extract_epi16(empties, 1);
-	int x2 = _mm_extract_epi16(empties, 0);
-<<<<<<< HEAD
-	unsigned long long bb;
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-=======
-	unsigned long long opponent;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-	// const int beta = alpha + 1;
-
-	SEARCH_STATS(++statistics.n_board_solve_2);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	opponent = EXTRACT_O(OP);
-	if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x2);
-<<<<<<< HEAD
-
-		if ((bestscore > alpha) && (NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-			score = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x1);
-			if (score < bestscore)
-				bestscore = score;
-			nodes = 3;
-		} else	nodes = 2;
-
-	} else if ((NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-		bestscore = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x1);
-		nodes = 2;
-
-	} else {	// pass - NEIGHBOUR test is almost 100% true
-		alpha = ~alpha;	// = -alpha - 1
-		if (!TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x1))) {
-			bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x2);
-
-			if ((bestscore > alpha) && !TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x2))) {
-				score = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x1);
-				if (score < bestscore)
-					bestscore = score;
-				nodes = 3;
-			} else	nodes = 2;
-
-		} else if (!TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x2))) {
-			bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x1);
-			nodes = 2;
-
-		} else {	// gameover
-			bestscore = board_solve(_mm_cvtsi128_si64(OP), 2);
-			nodes = 1;
-		}
-		bestscore = -bestscore;
-=======
-	bb = EXTRACT_O(OP);	// opponent
-	if ((NEIGHBOUR[x1] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = board_score_sse_1(_mm_xor_si128(OP, flipped), alpha + 1, x2);
-=======
-	opponent = EXTRACT_O(OP);
-	if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-<<<<<<< HEAD
-		bestscore = board_score_sse_1(_mm_xor_si128(OP, flipped), alpha, x2);
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
-		bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x2);
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
->>>>>>> 6c10ed6 (Replace board_score_sse_1 param PO with OP)
-
-		if ((bestscore > alpha) && (NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-			score = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x1);
-			if (score < bestscore)
-				bestscore = score;
-			nodes = 3;
-		} else	nodes = 2;
-
-	} else if ((NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-		bestscore = board_score_sse_1(_mm_xor_si128(_mm_shuffle_epi32(OP, SWAP64), reduce_vflip(flipped)), alpha, x1);
-		nodes = 2;
-
-	} else {	// pass - NEIGHBOUR test is almost 100% true
-		alpha = ~alpha;	// = -alpha - 1
-		if (!TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x1))) {
-			bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x2);
-
-			if ((bestscore > alpha) && !TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x2))) {
-				score = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x1);
-				if (score < bestscore)
-					bestscore = score;
-				nodes = 3;
-			} else	nodes = 2;
-
-		} else if (!TESTZ_FLIP(flipped = mm_Flip(_mm_shuffle_epi32(OP, SWAP64), x2))) {
-			bestscore = board_score_sse_1(_mm_xor_si128(OP, reduce_vflip(flipped)), alpha, x1);
-			nodes = 2;
-
-		} else {	// gameover
-			bestscore = board_solve(_mm_cvtsi128_si64(OP), 2);
-			nodes = 1;
-		}
-<<<<<<< HEAD
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-		bestscore = -bestscore;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-	}
-
-	SEARCH_UPDATE_2EMPTIES_NODES(*n_nodes += nodes;)
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	assert((bestscore & 1) == 0);
-	return bestscore;
-}
-
-/**
- * @brief Get the final score.
- *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final max score, when 3 empty squares remain.
- *
- * @param OP The board to evaluate.
- * @param alpha Alpha bound.
- * @param n_nodes Node counter.
- * @param empties Packed empty square coordinates.
- * @return The final max score, as a disc difference.
- */
-static int vectorcall search_solve_3(__m128i OP, int alpha, volatile unsigned long long *n_nodes, __m128i empties)
-{
-	__m128i flipped;
-	int score, bestscore, x, pol;
-	unsigned long long opponent;
-=======
- * Get the final score, when 3 empty squares remain.
-=======
- * Get the final min score, when 3 empty squares remain.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
- * Get the final max score, when 3 empty squares remain.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
- *
- * @param OP The board to evaluate.
- * @param alpha Alpha bound.
- * @param n_nodes Node counter.
- * @param empties Packed empty square coordinates.
- * @return The final max score, as a disc difference.
- */
-static int vectorcall search_solve_3(__m128i OP, int alpha, volatile unsigned long long *n_nodes, __m128i empties)
-{
-	__m128i flipped;
-	int score, bestscore, x, pol;
-	unsigned long long opponent;
-<<<<<<< HEAD
-	// const int beta = alpha + 1;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-
-	SEARCH_STATS(++statistics.n_search_solve_3);
-	SEARCH_UPDATE_INTERNAL_NODES(*n_nodes);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> beb2e1d (Refactor endgame_sse/neon solve 4 to 3 interface)
-#ifdef __AVX__
-	empties = _mm_cvtepu8_epi16(empties);
-#elif defined(__SSSE3__)
-	empties = _mm_unpacklo_epi8((empties), _mm_setzero_si128());
-#endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	bestscore = -SCORE_INF;
-	pol = 1;
-	do {
-		// best move alphabeta search
-		opponent = EXTRACT_O(OP);
-		x = _mm_extract_epi16(empties, 2);
-		if ((NEIGHBOUR[x] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			bestscore = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, empties);
-			if (bestscore > alpha) return bestscore * pol;
-		}
-
-		x = _mm_extract_epi16(empties, 1);
-		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xd8));	// (d3d1)d2d0
-			if (score > alpha) return score * pol;
-			else if (score > bestscore) bestscore = score;
-		}
-
-		x = _mm_extract_epi16(empties, 0);
-		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xc9));	// (d3d0)d2d1
-			if (score > bestscore) bestscore = score;
-			return bestscore * pol;
-		}
-
-		if (bestscore > -SCORE_INF)
-			return bestscore * pol;
-
-		OP = _mm_shuffle_epi32(OP, SWAP64);	// pass
-		alpha = ~alpha;	// = -(alpha + 1)
-	} while ((pol = -pol) < 0);
-
-	return board_solve(_mm_cvtsi128_si64(OP), 3);	// gameover
-=======
-=======
-#ifdef __AVX__
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-	empties = _mm_cvtepu8_epi16(empties);	// to ease shuffle
-	(void) sort3;
-#elif defined(__SSSE3__)
-	empties = _mm_unpacklo_epi8(empties, _mm_setzero_si128());
-	(void) sort3;
-#else
-	// parity based move sorting
-	switch (sort3 & 0x03) {
-		case 1:
-			empties = _mm_shufflelo_epi16(empties, 0xd8); // case 1(x2) 2(x1 x3)
-			break;
-		case 2:
-			empties = _mm_shufflelo_epi16(empties, 0xc9); // case 1(x3) 2(x1 x2)
-			break;
-		case 3:
-			empties = _mm_shufflelo_epi16(empties, 0xe1); // swap x2, x3
-			break;
-	}
-#endif
-
-=======
->>>>>>> ea8595b (Split v3hi_empties from search_solve_3 & moved to solve_4)
-	pol = 1;
-=======
-=======
->>>>>>> beb2e1d (Refactor endgame_sse/neon solve 4 to 3 interface)
-	pol = -1;
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
-	bestscore = SCORE_INF;	// min stage
-=======
-	bestscore = -SCORE_INF;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
-	pol = 1;
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-	do {
-		// best move alphabeta search
-		opponent = EXTRACT_O(OP);
-		x = _mm_extract_epi16(empties, 2);
-		if ((NEIGHBOUR[x] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			bestscore = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, empties);
-			if (bestscore > alpha) return bestscore * pol;
-		}
-
-		x = _mm_extract_epi16(empties, 1);
-		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xd8));	// (d3d1)d2d0
-			if (score > alpha) return score * pol;
-			else if (score > bestscore) bestscore = score;
-		}
-
-		x = _mm_extract_epi16(empties, 0);
-		if (/* (NEIGHBOUR[x] & opponent) && */ !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-			score = board_solve_2(board_flip_next(OP, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xc9));	// (d3d0)d2d1
-			if (score > bestscore) bestscore = score;
-			return bestscore * pol;
-		}
-
-		if (bestscore > -SCORE_INF)
-			return bestscore * pol;
-
-		OP = _mm_shuffle_epi32(OP, SWAP64);	// pass
-		alpha = ~alpha;	// = -(alpha + 1)
-	} while ((pol = -pol) < 0);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	return bestscore;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	return board_solve(_mm_cvtsi128_si64(OP), 3);	// gameover
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	return board_solve(_mm_cvtsi128_si64(OP), 3);	// gameover	// = board_solve(opponent, 3)
->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1)
-=======
-	return board_solve(EXTRACT_O(OP), 3);	// gameover
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
-	return board_solve(_mm_cvtsi128_si64(OP), 3);	// gameover
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
-}
-
-/**
- * @brief Get the final score.
- *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- * Get the final min score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final min score, as a disc difference.
-=======
- * Get the final max score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final max score, as a disc difference.
->>>>>>> c8118a8 (Use minimax instead of nagamax for solve 4 or less)
-=======
- * Get the final min score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final min score, as a disc difference.
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
- */
-
-// pick the move for this ply and pass the rest as packed 3 x 8 bit (AVX/SSSE3) or 3 x 16 bit (SSE), in search order.
-#if defined(__SSSE3__) || defined(__AVX__)
-  #ifdef __AVX__
-	#define	EXTRACT_MOVE(X,i)	_mm_extract_epi8((X), (i) * 4 + 3)
-  #else
-	#define	EXTRACT_MOVE(X,i)	(_mm_extract_epi16((X), (i) * 2 + 1) >> 8)
-  #endif
-	#define	v3_empties_0(empties,sort3)	(empties)
-	#define	v3_empties(empties,i,shuf,sort3)	_mm_srli_si128((empties), (i) * 4)
-#else
-	#define	EXTRACT_MOVE(X,i)	_mm_extract_epi16((X), 3 - (i))
-	static inline __m128i vectorcall v3_empties_0(__m128i empties, int sort3) {
-		// parity based move sorting
-		// if (sort3 == 3) empties = _mm_shufflelo_epi16(empties, 0xe1); // swap x2, x3
-		if (sort3 & 2)	empties = _mm_shufflelo_epi16(empties, 0xc9); // case 1(x3) 2(x1 x2): x3->x1->x2->x3
-		if (sort3 & 1)	empties = _mm_shufflelo_epi16(empties, 0xd8); // case 1(x2) 2(x1 x3): swap x1, x2
-		return empties;
-	}
-	#define	v3_empties(empties,i,shuf,sort3)	v3_empties_0(_mm_shufflelo_epi16((empties), (shuf)), (sort3))
-#endif
-
-static int search_solve_4(Search *search, int alpha)
-{
-	__m128i	OP, flipped;
-	__m128i	empties_series;	// (AVX) B15:4th, B11:3rd, B7:2nd, B3:1st, lower 3 bytes for 3 empties
-				// (SSE) W3:1st, W2:2nd, W1:3rd, W0:4th
-	int x1, x2, x3, x4, paritysort, score, bestscore, pol;
-	unsigned long long opponent;
-	// const int beta = alpha + 1;
-	static const unsigned char parity_case[64] = {	/* x4x3x2x1 = */
-		/*0000*/  0, /*0001*/  0, /*0010*/  1, /*0011*/  9, /*0100*/  2, /*0101*/ 10, /*0110*/ 11, /*0111*/  3,
-		/*0002*/  0, /*0003*/  0, /*0012*/  0, /*0013*/  0, /*0102*/  4, /*0103*/  4, /*0112*/  5, /*0113*/  5,
-		/*0020*/  1, /*0021*/  0, /*0030*/  1, /*0031*/  0, /*0120*/  6, /*0121*/  7, /*0130*/  6, /*0131*/  7,
-		/*0022*/  9, /*0023*/  0, /*0032*/  0, /*0033*/  9, /*0122*/  8, /*0123*/  0, /*0132*/  0, /*0133*/  8,
-		/*0200*/  2, /*0201*/  4, /*0210*/  6, /*0211*/  8, /*0300*/  2, /*0301*/  4, /*0310*/  6, /*0311*/  8,
-		/*0202*/ 10, /*0203*/  4, /*0212*/  7, /*0213*/  0, /*0302*/  4, /*0303*/ 10, /*0312*/  0, /*0313*/  7,
-		/*0220*/ 11, /*0221*/  5, /*0230*/  6, /*0231*/  0, /*0320*/  6, /*0321*/  0, /*0330*/ 11, /*0331*/  5,
-		/*0222*/  3, /*0223*/  5, /*0232*/  7, /*0233*/  8, /*0322*/  8, /*0323*/  7, /*0332*/  5, /*0333*/  3
-	};
-#if defined(__SSSE3__) || defined(__AVX__)
-	union V4SI {
-		unsigned int	ui[4];
-		__m128i	v4;
-	};
-	static const union V4SI shuf_mask[] = {	// make search order identical to 4.4.0
-		{{ 0x03020100, 0x02030100, 0x01030200, 0x00030201 }},	//  0: 1(x1) 3(x2 x3 x4), 1(x1) 1(x2) 2(x3 x4), 1 1 1 1, 4
-		{{ 0x03020100, 0x02030100, 0x01020300, 0x00020301 }},	//  1: 1(x2) 3(x1 x3 x4)
-		{{ 0x03010200, 0x02010300, 0x01030200, 0x00010302 }},	//  2: 1(x3) 3(x1 x2 x4)
-		{{ 0x03000201, 0x02000301, 0x01000302, 0x00030201 }},	//  3: 1(x4) 3(x1 x2 x3)
-		{{ 0x03010200, 0x01030200, 0x02030100, 0x00030201 }},	//  4: 1(x1) 1(x3) 2(x2 x4)
-		{{ 0x03000201, 0x00030201, 0x02030100, 0x01030200 }},	//  5: 1(x1) 1(x4) 2(x2 x3)
-		{{ 0x02010300, 0x01020300, 0x03020100, 0x00030201 }},	//  6: 1(x2) 1(x3) 2(x1 x4)
-		{{ 0x02000301, 0x00020301, 0x03020100, 0x01030200 }},	//  7: 1(x2) 1(x4) 2(x1 x3)
-		{{ 0x01000302, 0x00010302, 0x03020100, 0x02030100 }},	//  8: 1(x3) 1(x4) 2(x1 x2)
-		{{ 0x03020100, 0x02030100, 0x01000302, 0x00010302 }},	//  9: 2(x1 x2) 2(x3 x4)
-		{{ 0x03010200, 0x02000301, 0x01030200, 0x00020301 }},	// 10: 2(x1 x3) 2(x2 x4)
-		{{ 0x03000201, 0x02010300, 0x01020300, 0x00030201 }}	// 11: 2(x1 x4) 2(x2 x3)
-	};
-	enum { sort3 = 0 };	// sort is done on 4 empties
-#else
-	int sort3;	// for move sorting on 3 empties
-	static const short sort3_shuf[] = {
-		0x0000,	//  0: 1(x1) 3(x2 x3 x4), 1(x1) 1(x2) 2(x3 x4), 1 1 1 1, 4		x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-		0x1100,	//  1: 1(x2) 3(x1 x3 x4)	x4x2x1x3-x3x2x1x4-x2x1x3x4-x1x2x3x4
-		0x2011,	//  2: 1(x3) 3(x1 x2 x4)	x4x3x1x2-x3x1x2x4-x2x3x1x4-x1x3x2x4
-		0x0222,	//  3: 1(x4) 3(x1 x2 x3)	x4x1x2x3-x3x4x1x2-x2x4x1x3-x1x4x2x3
-		0x3000,	//  4: 1(x1) 1(x3) 2(x2 x4)	x4x1x2x3-x2x1x3x4-x3x1x2x4-x1x3x2x4 <- x4x1x3x2-x2x1x3x4-x3x1x2x4-x1x3x2x4
-		0x3300,	//  5: 1(x1) 1(x4) 2(x2 x3)	x3x1x2x4-x2x1x3x4-x4x1x2x3-x1x4x2x3 <- x3x1x4x2-x2x1x4x3-x4x1x2x3-x1x4x2x3
-		0x2000,	//  6: 1(x2) 1(x3) 2(x1 x4)	x4x1x2x3-x1x2x3x4-x3x2x1x4-x2x3x1x4 <- x4x2x3x1-x1x2x3x4-x3x2x1x4-x2x3x1x4
-		0x2300,	//  7: 1(x2) 1(x4) 2(x1 x3)	x3x1x2x4-x1x2x3x4-x4x2x1x3-x2x4x1x3 <- x3x2x4x1-x1x2x4x3-x4x2x1x3-x2x4x1x3
-		0x2200,	//  8: 1(x3) 1(x4) 2(x1 x2)	x2x1x3x4-x1x2x3x4-x4x3x1x2-x3x4x1x2 <- x2x3x4x1-x1x3x4x2-x4x3x1x2-x3x4x1x2
-		0x2200,	//  9: 2(x1 x2) 2(x3 x4)	x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		0x1021,	// 10: 2(x1 x3) 2(x2 x4)	x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		0x0112	// 11: 2(x1 x4) 2(x2 x3)	x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
-	};
-=======
- * Get the final score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final score, as a disc difference.
- */
-
-// pick the move for this ply and pass the rest as packed 3 x 8 bit (AVX/SSSE3) or 3 x 16 bit (SSE), in search order.
-#if defined(__SSSE3__) || defined(__AVX__)
-  #ifdef __AVX__
-	#define	EXTRACT_MOVE(X,i)	_mm_extract_epi8((X), (i) * 4 + 3)
-  #else
-	#define	EXTRACT_MOVE(X,i)	(_mm_extract_epi16((X), (i) * 2 + 1) >> 8)
-  #endif
-	#define	v3_empties_0(empties,sort3)	(empties)
-	#define	v3_empties(empties,i,shuf,sort3)	_mm_srli_si128((empties), (i) * 4)
-#else
-	#define	EXTRACT_MOVE(X,i)	_mm_extract_epi16((X), 3 - (i))
-	static inline __m128i vectorcall v3_empties_0(__m128i empties, int sort3) {
-		// parity based move sorting
-		// if (sort3 == 3) empties = _mm_shufflelo_epi16(empties, 0xe1); // swap x2, x3
-		if (sort3 & 2)	empties = _mm_shufflelo_epi16(empties, 0xc9); // case 1(x3) 2(x1 x2): x3->x1->x2->x3
-		if (sort3 & 1)	empties = _mm_shufflelo_epi16(empties, 0xd8); // case 1(x2) 2(x1 x3): swap x1, x2
-		return empties;
-	}
-	#define	v3_empties(empties,i,shuf,sort3)	v3_empties_0(_mm_shufflelo_epi16((empties), (shuf)), (sort3))
-#endif
-
-static int search_solve_4(Search *search, int alpha)
+static int search_solve_4(Search *search, int alpha)
 {
 	__m128i	OP, flipped;
 	__m128i	empties_series;	// (AVX) B15:4th, B11:3rd, B7:2nd, B3:1st, lower 3 bytes for 3 empties
@@ -1693,20 +730,6 @@ static int search_solve_4(Search *search, int alpha)
 	};
 	enum { sort3 = 0 };	// sort is done on 4 empties
 #else
-<<<<<<< HEAD
-	unsigned int sort3;	// for move sorting on 3 empties
-	static const short sort3_1_3[2][2] =
-		{{ 0x0000,	// case 1(x1) 3(x2 x3 x4)	// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-		   0x1100 },	// case 1(x2) 3(x1 x3 x4)	// x4x2x1x3-x3x2x1x4-x2x1x3x4-x1x2x3x4
-		 { 0x2011,	// case 1(x3) 3(x1 x2 x4)	// x4x3x1x2-x3x1x2x4-x2x3x1x4-x1x3x2x4
-		   0x0222 }};	// case 1(x4) 3(x1 x2 x3)	// x4x1x2x3-x3x4x1x2-x2x4x1x3-x1x4x2x3
-	static const short sort3_2_2[2][2] =
-		{{ 0x0112,	// case 2(x1 x4) 2(x2 x3)	// x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
-		   0x1021 },	// case 2(x1 x3) 2(x2 x4)	// x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		 { 0x2200,	// case 2(x1 x2) 2(x3 x4)	// x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		   0x0000 }};	// case 4			// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	int sort3;	// for move sorting on 3 empties
 	static const short sort3_shuf[] = {
 		0x0000,	//  0: 1(x1) 3(x2 x3 x4), 1(x1) 1(x2) 2(x3 x4), 1 1 1 1, 4		x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
@@ -1722,42 +745,13 @@ static int search_solve_4(Search *search, int alpha)
 		0x1021,	// 10: 2(x1 x3) 2(x2 x4)	x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
 		0x0112	// 11: 2(x1 x4) 2(x2 x3)	x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
 	};
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-	#define	SHUFFLE_EMPTIES(empties,mask)	_mm_shufflelo_epi16((empties), (mask))
-<<<<<<< HEAD
-	#define	EXTRACT_MOVE(X)	_mm_extract_epi16((X), 3)
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-=======
->>>>>>> ea8595b (Split v3hi_empties from search_solve_3 & moved to solve_4)
-=======
->>>>>>> beb2e1d (Refactor endgame_sse/neon solve 4 to 3 interface)
 #endif
 
 	SEARCH_STATS(++statistics.n_search_solve_4);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	// stability cutoff (try 12%, cut 7%)
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if (search_SC_NWS_4(search, alpha, &score)) return score;
-=======
-	// stability cutoff
-=======
-	// stability cutoff (try 12%, cut 7%)
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-	if (search_SC_NWS(search, alpha, &score)) return score;
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	if (search_SC_NWS(search, alpha, 4, &score)) return score;
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
-	if (search_SC_NWS_4(search, alpha, &score)) return score;
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 
 	OP = _mm_loadu_si128((__m128i *) &search->board);
 	x1 = search->empties[NOMOVE].next;
@@ -1769,632 +763,15 @@ static int search_solve_4(Search *search, int alpha)
 	// The following hole sizes are possible:
 	//    4 - 1 3 - 2 2 - 1 1 2 - 1 1 1 1
 	// Only the 1 1 2 case needs move sorting on this ply.
-<<<<<<< HEAD
-<<<<<<< HEAD
-	paritysort = parity_case[((x3 ^ x4) & 0x24) + ((((x2 ^ x4) & 0x24) * 2 + ((x1 ^ x4) & 0x24)) >> 2)];
-#if defined(__SSSE3__) || defined(__AVX__)
-	empties_series = _mm_cvtsi32_si128((x1 << 24) | (x2 << 16) | (x3 << 8) | x4);
-	empties_series = _mm_shuffle_epi8(empties_series, shuf_mask[paritysort].v4);
-=======
-	parity = search->eval.parity;
-	q1 = QUADRANT_ID[x1];
-	q2 = QUADRANT_ID[x2];
-	q3 = QUADRANT_ID[x3];
-#ifdef __AVX__
-	empties_series = _mm_cvtsi32_si128((x1 << 24) | (x2 << 16) | (x3 << 8) | x4);
-	if (parity & q1) {
-		if (parity & q2) {
-			if (parity & q3) { // case 1 3, case 1 1 1 1
-				empties_series = _mm_shuffle_epi8(empties_series, shuf_1_3[q1 == q2][q1 == q3].v4);
-			} else {	// case 1(x1) 1(x2) 2(x3 x4)	// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-				empties_series = _mm_shuffle_epi8(empties_series,
-					_mm_set_epi32(0x00030201, 0x01030200, 0x02030100, 0x03020100));
-			}
-		} else { // case 1(x1) 1(x3) 2(x2 x4), case 1(x1) 1(x4) 2(x2 x3)
-			empties_series = _mm_shuffle_epi8(empties_series, shuf_x1_1_2[(parity & q3) != 0].v4);
-		}
-	} else {
-		if (parity & q2) { // case 1(x2) 1(x3) 2(x1 x4), case 1(x2) 1(x4) 2(x1 x3)
-			empties_series = _mm_shuffle_epi8(empties_series, shuf_x2_1_2[(parity & q3) != 0].v4);
-		} else {
-			if (parity & q3) { // case 1(x3) 1(x4) 2(x1 x2)	// x2x1x3x4-x1x2x3x4-x4x3x1x2-x3x4x1x2
-				empties_series = _mm_shuffle_epi8(empties_series,
-					_mm_set_epi32(0x02030100, 0x03020100, 0x00010302, 0x01000302));
-			} else {	// case 2 2, case 4
-				empties_series = _mm_shuffle_epi8(empties_series, shuf_2_2[q1 == q2][q1 == q3].v4);
-			}
-		}
-	}
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 	paritysort = parity_case[((x3 ^ x4) & 0x24) + ((((x2 ^ x4) & 0x24) * 2 + ((x1 ^ x4) & 0x24)) >> 2)];
 #if defined(__SSSE3__) || defined(__AVX__)
 	empties_series = _mm_cvtsi32_si128((x1 << 24) | (x2 << 16) | (x3 << 8) | x4);
 	empties_series = _mm_shuffle_epi8(empties_series, shuf_mask[paritysort].v4);
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 #else // SSE
 	empties_series = _mm_cvtsi32_si128((x3 << 16) | x4);
 	empties_series = _mm_insert_epi16(empties_series, x2, 2);
 	empties_series = _mm_insert_epi16(empties_series, x1, 3);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	switch (paritysort) {
-		case 4: // case 1(x1) 1(x3) 2(x2 x4)
-			empties_series = _mm_shufflelo_epi16(empties_series, 0xd8);	// x1x3x2x4
-			break;
-		case 5: // case 1(x1) 1(x4) 2(x2 x3)
-			empties_series = _mm_shufflelo_epi16(empties_series, 0xc9);	// x1x4x2x3
-			break;
-		case 6:	// case 1(x2) 1(x3) 2(x1 x4)
-			empties_series = _mm_shufflelo_epi16(empties_series, 0x9c);	// x2x3x1x4
-			break;
-		case 7: // case 1(x2) 1(x4) 2(x1 x3)
-			empties_series = _mm_shufflelo_epi16(empties_series, 0x8d);	// x2x4x1x3
-			break;
-		case 8:	// case 1(x3) 1(x4) 2(x1 x2)
-			empties_series = _mm_shufflelo_epi16(empties_series, 0x4e);	// x3x4x1x2
-			break;
-	}
-	sort3 = sort3_shuf[paritysort];
-#endif
-
-	bestscore = SCORE_INF;	// min stage
-	pol = 1;
-	do {
-		// best move alphabeta search
-		opponent = EXTRACT_O(OP);
-		x1 = EXTRACT_MOVE(empties_series, 0);
-		if ((NEIGHBOUR[x1] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-			bestscore = search_solve_3(board_flip_next(OP, x1, flipped), alpha, &search->n_nodes,
-				v3_empties_0(empties_series, sort3));
-			if (bestscore <= alpha) return bestscore * pol;
-		}
-
-		x2 = EXTRACT_MOVE(empties_series, 1);
-		if ((NEIGHBOUR[x2] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-			score = search_solve_3(board_flip_next(OP, x2, flipped), alpha, &search->n_nodes,
-				v3_empties(empties_series, 1, 0xb4, sort3 >> 4));	// (SSE) x2x1x3x4
-			if (score <= alpha) return score * pol;
-			else if (score < bestscore) bestscore = score;
-		}
-
-		x3 = EXTRACT_MOVE(empties_series, 2);
-		if ((NEIGHBOUR[x3] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x3))) {
-			score = search_solve_3(board_flip_next(OP, x3, flipped), alpha, &search->n_nodes,
-				v3_empties(empties_series, 2, 0x78, sort3 >> 8));	// (SSE) x3x1x2x4
-			if (score <= alpha) return score * pol;
-			else if (score < bestscore) bestscore = score;
-		}
-
-		x4 = EXTRACT_MOVE(empties_series, 3);
-		if ((NEIGHBOUR[x4] & opponent) && !TESTZ_FLIP(flipped = mm_Flip(OP, x4))) {
-			score = search_solve_3(board_flip_next(OP, x4, flipped), alpha, &search->n_nodes,
-				v3_empties(empties_series, 3, 0x39, sort3 >> 12));	// (SSE) x4x1x2x3
-			if (score <= bestscore) bestscore = score;
-			return bestscore * pol;
-		}
-
-		if (bestscore < SCORE_INF)
-			return bestscore * pol;
-
-		OP = _mm_shuffle_epi32(OP, SWAP64);	// pass
-		alpha = ~alpha;	// = -(alpha + 1)
-	} while ((pol = -pol) < 0);
-
-	return board_solve(EXTRACT_O(OP), 4);	// gameover
-}
-=======
-/**
- * @file endgame_avx.c
- *
- *
- * SSE / AVX optimized version of endgame.c for the last four empties.
- *
- * Bitboard and empty list is kept in SSE registers, but performance gain
- * is limited for GCC minGW build since vectorcall is not supported.
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- * 
- */
-
-#include "bit.h"
-#include "settings.h"
-#include "search.h"
-#include <assert.h>
-
-#define	SWAP64	0x4e	// for _mm_shuffle_epi32
-#define	DUPLO	0x44
-#define	DUPHI	0xee
-
-#if defined(__AVX__) && (defined(__x86_64__) || defined(_M_X64))
-#define	EXTRACT_O(OP)	_mm_extract_epi64(OP, 1)
-#else
-#define	EXTRACT_O(OP)	_mm_cvtsi128_si64(_mm_shuffle_epi32(OP, DUPHI))
-#endif
-
-#ifdef __AVX__
-#define	EXTRACT_B3(X)	_mm_extract_epi8(X, 3)
-static inline int TESTZ_FLIP(__m128i X) { return _mm_testz_si128(X, X); }
-#else
-#define	EXTRACT_B3(X)	(_mm_cvtsi128_si32(X) >> 24)
-#if defined(__x86_64__) || defined(_M_X64)
-#define TESTZ_FLIP(X)	(!_mm_cvtsi128_si64(X))
-#else
-static inline int TESTZ_FLIP(__m128i X) { return !_mm_cvtsi128_si32(_mm_packs_epi16(X, X)); }
-#endif
-#define _mm_cvtepu8_epi16(X)	_mm_unpacklo_epi8((X), _mm_setzero_si128())
-#endif
-
-// in count_last_flip_sse.c
-extern const unsigned char COUNT_FLIP[8][256];
-extern const V4DI mask_dvhd[64];
-
-/**
- * @brief Compute a board resulting of a move played on a previous board.
- *
- * @param OP board to play the move on.
- * @param x move to play.
- * @param next resulting board.
- * @return true if no flips.
- */
-static inline __m128i board_next_sse(__m128i OP, int x, __m128i flipped)
-{
-	OP = _mm_xor_si128(OP, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x])));
-	return _mm_shuffle_epi32(OP, SWAP64);
-}
-
-/**
- * @brief Get the final score.
- *
- * Get the final score, when no move can be made.
- *
- * @param OP Board.
- * @param n_empties Number of empty squares remaining on the board.
- * @return The final score, as a disc difference.
- */
-static int vectorcall board_solve_sse(__m128i OP, int n_empties)
-{
-	int score = bit_count(_mm_cvtsi128_si64(OP)) * 2 - SCORE_MAX;	// in case of opponents win
-	int diff = score + n_empties;		// = n_discs_p - (64 - n_empties - n_discs_p)
-
-	SEARCH_STATS(++statistics.n_search_solve);
-
-	if (diff >= 0)
-		score = diff;
-	if (diff > 0)
-		score += n_empties;
-	return score;
-}
-
-/**
- * @brief Get the final score.
- *
- * Get the final score, when 1 empty squares remain.
- * The following code has been adapted from Zebra by Gunnar Anderson.
- *
- * @param OP  Board to evaluate.
- * @param beta   Beta bound.
- * @param pos    Last empty square to play.
- * @return       The final opponent score, as a disc difference.
- */
-static int vectorcall board_score_sse_1(__m128i OP, const int beta, const int pos)
-{
-	int	score, score2;
-	unsigned char	n_flips;
-	unsigned long long	P;
-	unsigned int	t;
-	const unsigned char *COUNT_FLIP_X = COUNT_FLIP[pos & 7];
-	const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3];
-#ifdef AVXLASTFLIP
-	__m256i	MP, MO;
-#else
-	__m128i	PP, OO;
-#endif
-	__m128i	II;
-
-	P = _mm_cvtsi128_si64(OP);
-	score = SCORE_MAX - 2 - 2 * bit_count(P);	// 2 * bit_count(O) - SCORE_MAX
-
-	// n_flips = last_flip(pos, P);
-#ifdef AVXLASTFLIP
-	n_flips  = COUNT_FLIP_X[(unsigned char) (P >> (pos & 0x38))];
-	MP = _mm256_and_si256(_mm256_broadcastq_epi64(OP), mask_dvhd[pos].v4);
-	t = _mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), MP));
-	n_flips += COUNT_FLIP_Y[(unsigned char) t];
-	t >>= 16;
-#else
-	PP = _mm_shuffle_epi32(OP, DUPLO);
-	II = _mm_sad_epu8(_mm_and_si128(PP, mask_dvhd[pos].v2[0]), _mm_setzero_si128());
-	n_flips  = COUNT_FLIP_X[_mm_cvtsi128_si32(II)];
-	n_flips += COUNT_FLIP_X[_mm_extract_epi16(II, 4)];
-	t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(PP, mask_dvhd[pos].v2[1])));
-#endif
-	n_flips += COUNT_FLIP_Y[t >> 8];
-	n_flips += COUNT_FLIP_Y[(unsigned char) t];
-	score -= n_flips;
-
-	if (n_flips == 0) {
-		score2 = score + 2;	// empty for player
-		if (score >= 0)
-			score = score2;
-
-		if (score < beta) {	// lazy cut-off
-			// n_flips = last_flip(pos, EXTRACT_O(OP));
-#ifdef AVXLASTFLIP
-			MO = _mm256_and_si256(_mm256_permute4x64_epi64(_mm256_castsi128_si256(OP), 0x55), mask_dvhd[pos].v4);
-			II = _mm_sad_epu8(_mm256_castsi256_si128(MO), _mm_setzero_si128());
-			t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm256_extracti128_si256(MO, 1)));
-#else
-			OO = _mm_shuffle_epi32(OP, DUPHI);
-			II = _mm_sad_epu8(_mm_and_si128(OO, mask_dvhd[pos].v2[0]), _mm_setzero_si128());
-			t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(OO, mask_dvhd[pos].v2[1])));
-#endif
-			n_flips  = COUNT_FLIP_X[_mm_cvtsi128_si32(II)];
-			n_flips += COUNT_FLIP_X[_mm_extract_epi16(II, 4)];
-			n_flips += COUNT_FLIP_Y[t >> 8];
-			n_flips += COUNT_FLIP_Y[(unsigned char) t];
-
-			if (n_flips != 0)
-				score = score2 + n_flips;
-		}
-	}
-
-	return score;
-}
-
-/**
- * @brief Get the final score.
- *
- * Get the final score, when 2 empty squares remain.
- *
- * @param OP The board to evaluate.
- * @param empties Packed empty square coordinates.
- * @param alpha Alpha bound.
- * @param n_nodes Node counter.
- * @return The final score, as a disc difference.
- */
-static int vectorcall board_solve_sse_2(__m128i OP, int alpha, volatile unsigned long long *n_nodes, __m128i empties)
-{
-	__m128i flipped, PO;
-	int score, bestscore, nodes;
-	int x1 = _mm_extract_epi16(empties, 1);
-	int x2 = _mm_extract_epi16(empties, 0);
-	unsigned long long bb;
-	// const int beta = alpha + 1;
-
-	SEARCH_STATS(++statistics.n_board_solve_2);
-
-	bb = EXTRACT_O(OP);	// opponent
-	if ((NEIGHBOUR[x1] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = board_score_sse_1(board_next_sse(OP, x1, flipped), alpha + 1, x2);
-		nodes = 2;
-
-		if ((bestscore <= alpha) && (NEIGHBOUR[x2] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-			score = board_score_sse_1(board_next_sse(OP, x2, flipped), alpha + 1, x1);
-			if (score > bestscore) bestscore = score;
-			nodes = 3;
-		}
-
-	} else if ((NEIGHBOUR[x2] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-		bestscore = board_score_sse_1(board_next_sse(OP, x2, flipped), alpha + 1, x1);
-		nodes = 2;
-
-	} else {	// pass
-		bb = _mm_cvtsi128_si64(OP);	// player
-		PO = _mm_shuffle_epi32(OP, SWAP64);
-		if ((NEIGHBOUR[x1] & bb) && !TESTZ_FLIP(flipped = mm_Flip(PO, x1))) {
-			bestscore = -board_score_sse_1(board_next_sse(PO, x1, flipped), -alpha, x2);
-			nodes = 2;
-
-			if ((bestscore > alpha) && (NEIGHBOUR[x2] & bb) && !TESTZ_FLIP(flipped = mm_Flip(PO, x2))) {
-				score = -board_score_sse_1(board_next_sse(PO, x2, flipped), -alpha, x1);
-				if (score < bestscore) bestscore = score;
-				nodes = 3;
-			}
-
-		} else if ((NEIGHBOUR[x2] & bb) && !TESTZ_FLIP(flipped = mm_Flip(PO, x2))) {
-			bestscore = -board_score_sse_1(board_next_sse(PO, x2, flipped), -alpha, x1);
-			nodes = 2;
-
-		} else {	// gameover
-			bestscore = board_solve_sse(OP, 2);
-			nodes = 1;
-		}
-	}
-
-	SEARCH_UPDATE_2EMPTIES_NODES(*n_nodes += nodes;)
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	assert((bestscore & 1) == 0);
-	return bestscore;
-}
-
-/**
- * @brief Get the final score.
- *
- * Get the final score, when 3 empty squares remain.
- *
- * @param OP The board to evaluate.
- * @param empties Packed empty square coordinates.
- * @param alpha Alpha bound.
- * @param sort3 Parity flags.
- * @param n_nodes Node counter.
- * @return The final score, as a disc difference.
- */
-static int vectorcall search_solve_sse_3(__m128i OP, int alpha, unsigned int sort3, volatile unsigned long long *n_nodes, __m128i empties)
-{
-	__m128i flipped, PO;
-	int score, bestscore, x;
-	unsigned long long bb;
-	// const int beta = alpha + 1;
-
-	SEARCH_STATS(++statistics.n_search_solve_3);
-	SEARCH_UPDATE_INTERNAL_NODES(*n_nodes);
-
-	empties = _mm_cvtepu8_epi16(empties);	// to ease shuffle
-	// parity based move sorting
-	if (sort3 & 0x03) {
-#ifndef __AVX__
-		if (sort3 & 0x01)
-			empties = _mm_shufflelo_epi16(empties, 0xd8); // case 1(x2) 2(x1 x3)
-		else
-			empties = _mm_shufflelo_epi16(empties, 0xc9); // case 1(x3) 2(x1 x2)
-#endif
-	}
-
-	// best move alphabeta search
-	bestscore = -SCORE_INF;
-	bb = EXTRACT_O(OP);	// opponent
-	x = _mm_extract_epi16(empties, 2);
-	if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-		bestscore = -board_solve_sse_2(board_next_sse(OP, x, flipped), -(alpha + 1), n_nodes, empties);
-		if (bestscore > alpha) return bestscore;
-	}
-
-	x = _mm_extract_epi16(empties, 1);
-	if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-		score = -board_solve_sse_2(board_next_sse(OP, x, flipped), -(alpha + 1), n_nodes, _mm_shufflelo_epi16(empties, 0xd8));
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	x = _mm_extract_epi16(empties, 0);
-	if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(OP, x))) {
-		score = -board_solve_sse_2(board_next_sse(OP, x, flipped), -(alpha + 1), n_nodes, _mm_shufflelo_epi16(empties, 0xc9));
-		if (score > bestscore) bestscore = score;
-	}
-
-	else if (bestscore == -SCORE_INF) {	// pass ?
-		// best move alphabeta search
-		bestscore = SCORE_INF;
-		bb = _mm_cvtsi128_si64(OP);	// player
-		PO = _mm_shuffle_epi32(OP, SWAP64);
-		x = _mm_extract_epi16(empties, 2);
-		if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(PO, x))) {
-			bestscore = board_solve_sse_2(board_next_sse(PO, x, flipped), alpha, n_nodes, empties);
-			if (bestscore <= alpha) return bestscore;
-		}
-
-		x = _mm_extract_epi16(empties, 1);
-		if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(PO, x))) {
-			score = board_solve_sse_2(board_next_sse(PO, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xd8));
-			if (score <= alpha) return score;
-			else if (score < bestscore) bestscore = score;
-		}
-
-		x = _mm_extract_epi16(empties, 0);
-		if ((NEIGHBOUR[x] & bb) && !TESTZ_FLIP(flipped = mm_Flip(PO, x))) {
-			score = board_solve_sse_2(board_next_sse(PO, x, flipped), alpha, n_nodes, _mm_shufflelo_epi16(empties, 0xc9));
-			if (score < bestscore) bestscore = score;
-		}
-
-		else if (bestscore == SCORE_INF)	// gameover
-			bestscore = board_solve_sse(OP, 3);
-	}
-
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	return bestscore;
-}
-
-/**
- * @brief Get the final score.
- *
- * Get the final score, when 4 empty squares remain.
- *
- * @param search Search position.
- * @param alpha Upper score value.
- * @return The final score, as a disc difference.
- */
-
-typedef union {
-	unsigned int	ui[4];
-	__m128i	v4;
-} V4SI;
-
-int search_solve_4(Search *search, const int alpha)
-{
-	__m128i	OP, flipped;
-	__m128i	empties_series;	// B15:4th, B11:3rd, B7:2nd, B3:1st, lower 3 bytes for 3 empties
-	int x1, x2, x3, x4, q1, q2, q3;
-	int score, bestscore;
-	unsigned int parity;
-	unsigned long long opp;
-	// const int beta = alpha + 1;
-#ifdef __AVX__
-	static const V4SI shuf_x1_1_2[2] = {	// case 1(x1) 1(x4) 2(x2 x3), case 1(x1) 1(x3) 2(x2 x4)
-		 {{ 0x03000201, 0x00030201, 0x02030100, 0x01030200 }},		// x3x1x2x4-x2x1x3x4-x4x1x2x3-x1x4x2x3
-		 {{ 0x03010200, 0x01030200, 0x02030100, 0x00030201 }}};		// x4x1x2x3-x2x1x3x4-x3x1x2x4-x1x3x2x4
-	static const V4SI shuf_x2_1_2[2] = {	// case 1(x2) 1(x4) 2(x1 x3), case 1(x2) 1(x3) 2(x1 x4)
-		 {{ 0x02000301, 0x00020301, 0x03020100, 0x01030200 }},		// x3x1x2x4-x1x2x3x4-x4x2x1x3-x2x4x1x3
-		 {{ 0x02010300, 0x01020300, 0x03020100, 0x00030201 }}};		// x4x1x2x3-x1x2x3x4-x3x2x1x4-x2x3x1x4
-	static const V4SI shuf_1_3[2][2] = {
-		{{{ 0x03020100, 0x02030100, 0x01030200, 0x00030201 }},  	// case 1(x1) 3(x2 x3 x4), case 1 1 1 1
-		 {{ 0x03020100, 0x02030100, 0x01020300, 0x00020301 }}}, 	// case 1(x2) 3(x1 x3 x4)
-		{{{ 0x03010200, 0x02010300, 0x01030200, 0x00010302 }},  	// case 1(x3) 3(x1 x2 x4)
-		 {{ 0x03000201, 0x02000301, 0x01000302, 0x00030201 }}}};	// case 1(x4) 3(x1 x2 x3)
-	static const V4SI shuf_2_2[2][2] = {
-		{{{ 0x03000201, 0x02010300, 0x01020300, 0x00030201 }},  	// case 2(x1 x4) 2(x2 x3)
-		 {{ 0x03010200, 0x02000301, 0x01030200, 0x00020301 }}}, 	// case 2(x1 x3) 2(x2 x4)
-		{{{ 0x03020100, 0x02030100, 0x01000302, 0x00010302 }},  	// case 2(x1 x2) 2(x3 x4)
-		 {{ 0x03020100, 0x02030100, 0x01030200, 0x00030201 }}}};	// case 4
-	enum { sort3 = 0 };	// sort is done on 4 empties
-#else
-	unsigned int sort3;	// for move sorting on 3 empties
-	static const short sort3_1_3[2][2] =
-		{{ 0x0000,	// case 1(x1) 3(x2 x3 x4)	// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-		   0x1100 },	// case 1(x2) 3(x1 x3 x4)	// x4x2x1x3-x3x2x1x4-x2x1x3x4-x1x2x3x4
-		 { 0x2011,	// case 1(x3) 3(x1 x2 x4)	// x4x3x1x2-x3x1x2x4-x2x3x1x4-x1x3x2x4
-		   0x0222 }};	// case 1(x4) 3(x1 x2 x3)	// x4x1x2x3-x3x4x1x2-x2x4x1x3-x1x4x2x3
-	static const short sort3_2_2[2][2] =
-		{{ 0x0112,	// case 2(x1 x4) 2(x2 x3)	// x4x1x2x3-x3x2x1x4-x2x3x1x4-x1x4x2x3
-		   0x1021 },	// case 2(x1 x3) 2(x2 x4)	// x4x2x1x3-x3x1x2x4-x2x4x1x3-x1x3x2x4
-		 { 0x2200,	// case 2(x1 x2) 2(x3 x4)	// x4x3x1x2-x3x4x1x2-x2x1x3x4-x1x2x3x4
-		   0x0000 }};	// case 4			// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-#endif
-
-	SEARCH_STATS(++statistics.n_search_solve_4);
-	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
-
-	// stability cutoff
-	if (search_SC_NWS(search, alpha, &score)) return score;
-
-	OP = _mm_loadu_si128((__m128i *) &search->board);
-	x1 = search->empties[NOMOVE].next;
-	x2 = search->empties[x1].next;
-	x3 = search->empties[x2].next;
-	x4 = search->empties[x3].next;
-
-	// parity based move sorting.
-	// The following hole sizes are possible:
-	//    4 - 1 3 - 2 2 - 1 1 2 - 1 1 1 1
-	// Only the 1 1 2 case needs move sorting on this ply.
-	parity = search->eval.parity;
-	q1 = QUADRANT_ID[x1];
-	q2 = QUADRANT_ID[x2];
-	q3 = QUADRANT_ID[x3];
-#ifdef __AVX__
-	empties_series = _mm_cvtsi32_si128((x1 << 24) | (x2 << 16) | (x3 << 8) | x4);
-	if (parity & q1) {
-		if (parity & q2) {
-			if (parity & q3) { // case 1 3, case 1 1 1 1
-				empties_series = _mm_shuffle_epi8(empties_series, shuf_1_3[q1 == q2][q1 == q3].v4);
-			} else {	// case 1(x1) 1(x2) 2(x3 x4)	// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-				empties_series = _mm_shuffle_epi8(empties_series,
-					_mm_set_epi32(0x00030201, 0x01030200, 0x02030100, 0x03020100));
-			}
-		} else { // case 1(x1) 1(x3) 2(x2 x4), case 1(x1) 1(x4) 2(x2 x3)
-			empties_series = _mm_shuffle_epi8(empties_series, shuf_x1_1_2[(parity & q3) != 0].v4);
-		}
-	} else {
-		if (parity & q2) { // case 1(x2) 1(x3) 2(x1 x4), case 1(x2) 1(x4) 2(x1 x3)
-			empties_series = _mm_shuffle_epi8(empties_series, shuf_x2_1_2[(parity & q3) != 0].v4);
-		} else {
-			if (parity & q3) { // case 1(x3) 1(x4) 2(x1 x2)	// x2x1x3x4-x1x2x3x4-x4x3x1x2-x3x4x1x2
-				empties_series = _mm_shuffle_epi8(empties_series,
-					_mm_set_epi32(0x02030100, 0x03020100, 0x00010302, 0x01000302));
-			} else {	// case 2 2, case 4
-				empties_series = _mm_shuffle_epi8(empties_series, shuf_2_2[q1 == q2][q1 == q3].v4);
-			}
-		}
-	}
-
-#else // SSE
-	empties_series = _mm_cvtsi32_si128((x3 << 16) | x4);
-	empties_series = _mm_insert_epi16(empties_series, x2, 2);
-	empties_series = _mm_insert_epi16(empties_series, x1, 3);
-	empties_series = _mm_packus_epi16(_mm_unpacklo_epi64(empties_series, _mm_shufflelo_epi16(empties_series, 0xb4)),
-		_mm_unpacklo_epi64(_mm_shufflelo_epi16(empties_series, 0x78), _mm_shufflelo_epi16(empties_series, 0x39)));
-							// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-	if (parity & q1) {
-		if (parity & q2) {
-			sort3 = 0;	// case 1(x1) 1(x2) 2(x3 x4)
-			if (parity & q3) { // case 1 3, case 1 1 1 1
-				sort3 = sort3_1_3[q1 == q2][q1 == q3];
-			}
-		} else {
-			if (parity & q3) { // case 1(x1) 1(x3) 2(x2 x4)
-				empties_series = _mm_shuffle_epi32(empties_series, 0xd8);	// x4...x2...x3...x1...
-				sort3 = 0x0001;		// ..-x1x3x2x4
-			} else { // case 1(x1) 1(x4) 2(x2 x3)
-				empties_series = _mm_shuffle_epi32(empties_series, 0x9c);	// x3...x2...x4...x1...
-				sort3 = 0x0002;		// ..-x1x4x2x3
-			}
-		}
-	} else {
-		if (parity & q2) {
-			if (parity & q3) { // case 1(x2) 1(x3) 2(x1 x4)
-				empties_series = _mm_shuffle_epi32(empties_series, 0xc9);	// x4...x1...x3...x2...
-				sort3 = 0x0011;		// ..-x3x2x1x4-x2x3x1x4
-			} else { // case 1(x2) 1(x4) 2(x1 x3)
-				empties_series = _mm_shuffle_epi32(empties_series, 0x8d);	// x3...x1...x4...x2...
-				sort3 = 0x0012;		// ..-x4x2x1x3-x2x4x1x3
-			}
-		} else {
-			if (parity & q3) { // case 1(x3) 1(x4) 2(x1 x2)
-				empties_series = _mm_shuffle_epi32(empties_series, 0x4e);	// x2...x1...x4...x3...
-				sort3 = 0x0022;		// ..-x4x3x1x2-x3x4x1x2
-			} else {	// case 2 2, case 4
-				sort3 = sort3_2_2[q1 == q2][q1 == q3];
-			}
-		}
-	}
-#endif
-
-	// best move alphabeta search
-	bestscore = -SCORE_INF;
-	opp = EXTRACT_O(OP);
-	x1 = EXTRACT_B3(empties_series);
-	if ((NEIGHBOUR[x1] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x1))) {
-		bestscore = -search_solve_sse_3(board_next_sse(OP, x1, flipped), -(alpha + 1), sort3, &search->n_nodes, empties_series);
-		if (bestscore > alpha) return bestscore;
-	}
-
-	empties_series = _mm_shuffle_epi32(empties_series, 0x39);
-	x2 = EXTRACT_B3(empties_series);
-	if ((NEIGHBOUR[x2] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x2))) {
-		score = -search_solve_sse_3(board_next_sse(OP, x2, flipped), -(alpha + 1), sort3 >> 4, &search->n_nodes, empties_series);
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	empties_series = _mm_shuffle_epi32(empties_series, 0x39);
-	x3 = EXTRACT_B3(empties_series);
-	if ((NEIGHBOUR[x3] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x3))) {
-		score = -search_solve_sse_3(board_next_sse(OP, x3, flipped), -(alpha + 1), sort3 >> 8, &search->n_nodes, empties_series);
-		if (score > alpha) return score;
-		else if (score > bestscore) bestscore = score;
-	}
-
-	empties_series = _mm_shuffle_epi32(empties_series, 0x39);
-	x4 = EXTRACT_B3(empties_series);
-	if ((NEIGHBOUR[x4] & opp) && !TESTZ_FLIP(flipped = mm_Flip(OP, x4))) {
-		score = -search_solve_sse_3(board_next_sse(OP, x4, flipped), -(alpha + 1), sort3 >> 12, &search->n_nodes, empties_series);
-		if (score > bestscore) bestscore = score;
-	}
-
-	else if (bestscore == -SCORE_INF) {	// no move
-		if (can_move(opp, _mm_cvtsi128_si64(OP))) { // pass
-			search_pass_endgame(search);
-			bestscore = -search_solve_4(search, -(alpha + 1));
-			search_pass_endgame(search);
-		} else { // gameover
-			bestscore = board_solve_sse(OP, 4);
-		}
-	}
-
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-	return bestscore;
-}
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	empties_series = _mm_packus_epi16(_mm_unpacklo_epi64(empties_series, _mm_shufflelo_epi16(empties_series, 0xb4)),
-		_mm_unpacklo_epi64(_mm_shufflelo_epi16(empties_series, 0x78), _mm_shufflelo_epi16(empties_series, 0x39)));
-			// x4x1x2x3-x3x1x2x4-x2x1x3x4-x1x2x3x4
-=======
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 	switch (paritysort) {
 		case 4: // case 1(x1) 1(x3) 2(x2 x4)
 			empties_series = _mm_shufflelo_epi16(empties_series, 0xd8);	// x1x3x2x4
@@ -2460,4 +837,3 @@ int search_solve_4(Search *search, const int alpha)
 
 	return board_solve(EXTRACT_O(OP), 4);	// gameover
 }
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/eval.c b/src/eval.c
index 6899d06..500d4e2 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -3,36 +3,10 @@
  *
  * Evaluation function.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2018
-=======
- * @date 1998 - 2020
->>>>>>> bb33695 (Cleaner eval_open unpacking)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
- * @date 1998 - 2022
-=======
- * @date 1998 - 2023
->>>>>>> 6de3ab1 (Omit eval_weight table for ply > 53)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
  */
 
 #include "eval.h"
@@ -46,23 +20,7 @@
 #include <stdlib.h>
 #include <assert.h>
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if !defined(VECTOR_EVAL_UPDATE) && !defined(hasSSE2) && !defined(__ARM_NEON)
-=======
-#ifndef __SSE2__
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-#ifndef hasSSE2
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-#if !defined(VECTOR_EVAL_UPDATE) && !defined(hasSSE2) && !defined(hasNeon)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-#if !defined(VECTOR_EVAL_UPDATE) && !defined(hasSSE2) && !defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 
 /** coordinate to feature conversion */
 typedef struct CoordinateToFeature {
@@ -211,283 +169,6 @@ static const CoordinateToFeature EVAL_X2F[] = {
 	{4, {{ 0,     0}, { 0,     0}, { 0,     0}, { 0,     0}}} // <- PASS
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#endif
-#if defined(VECTOR_EVAL_UPDATE) || defined(hasSSE2) || defined(__ARM_NEON) || defined(DISPATCH_NEON) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-
-const EVAL_FEATURE_V EVAL_FEATURE[65] = {
-	{{ // a1
-		 6561,     0,     0,     0,   243,     0,     0,     0,  6561,     0,  6561,     0, 19683,     0, 19683,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b1
-		 2187,     0,     0,     0,    27,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c1
-		   81,     0,     0,     0,     9,     0,     0,     0,   729,     0,     0,     0,  6561,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d1
-		    0,     0,     0,     0,     3,     1,     0,     0,   243,     0,     0,     0,  2187,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,    81,     0,     0,     0,    27,     0,     0,     0,     0,     0
-	}}, {{ // e1
-		    0,     0,     0,     0,     1,     3,     0,     0,    81,     0,     0,     0,     9,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     0,    27,     0,     0,     0
-	}}, {{ // f1
-		    0,    81,     0,     0,     0,     9,     0,     0,    27,     0,     0,     0,     3,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g1
-		    0,  2187,     0,     0,     0,    27,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h1
-		    0,  6561,     0,     0,     0,   243,     0,     0,     3,     0,     0,  6561,     1,     0,     0, 19683,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a2
-		  729,     0,     0,     0,   729,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,
-		 2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		  729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b2
-		  243,     0,     0,     0,    81,     0,     0,     0, 19683,     0, 19683,     0,     0,     0,     0,     0,
-		  729,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c2
-		    9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,
-		  243,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,   243,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0
-	}}, {{ // d2
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,     0,     0,     0,
-		   81,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,
-		    0,     0,    81,     0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e2
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     0,
-		   27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,
-		    0,     0,     0,     0,    81,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f2
-		    0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,     0,     0,
-		    9,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0
-	}}, {{ // g2
-		    0,   243,     0,     0,     0,    81,     0,     0,     1,     0,     0, 19683,     0,     0,     0,     0,
-		    3,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h2
-		    0,   729,     0,     0,     0,   729,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,
-		    1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a3
-		   27,     0,     0,     0,  2187,     0,     0,     0,     0,     0,   729,     0,     0,     0,  6561,     0,
-		    0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b3
-		    3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,
-		    0,     0,   243,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		  243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0
-	}}, {{ // c3
-		    1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,   243,     0,   243,     0,     0,     0,     0,     0,   243,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d3
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,    81,     0,     0,     0,     0,     0,   243,     0,     0,     0,    81,     0,
-		    0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e3
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,   243,     0,     0,     0,     0,
-		    0,    81,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f3
-		    0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     9,     0,     0,   243,     0,     0,     0,     0,     0,     9,     0,     0,
-		    0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g3
-		    0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,
-		    0,     0,     0,   243,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0
-	}}, {{ // h3
-		    0,    27,     0,     0,     0,  2187,     0,     0,     0,     0,     0,   729,     0,     0,     0,  6561,
-		    0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a4
-		    0,     0,     0,     0,  6561,     0, 19683,     0,     0,     0,   243,     0,     0,     0,  2187,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     1,     0,     0,     0,     0,     0
-	}}, {{ // b4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,     0,
-		    0,     0,    81,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,    81,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,    81,     0,   243,     0,     0,     0,     0,     0,     0,     0,
-		   81,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,    81,     0,    81,     0,    81,     0,     0,     0,
-		    0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,    27,     0,     0,    81,     0,    27,    27,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,    81,     9,     0,     0,     0,     0,     0,     0,    81,
-		    0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,
-		    0,     0,     0,    81,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,    81,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h4
-		    0,     0,     0,     0,     0,  6561,     0, 19683,     0,     0,     0,   243,     0,     0,     0,  2187,
-		    0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     1,     0,     0,     0
-	}}, {{ // a5
-		    0,     0,     0,     0, 19683,     0,  6561,     0,     0,     0,    81,     0,     0,     0,     9,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,    27,     0,     0,     0,     0
-	}}, {{ // b5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,
-		    0,     0,    27,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     3,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,    27,     0,     0,   243,     0,     0,     0,     0,     0,     0,
-		    0,     9,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,    81,    27,     0,     0,    81,     0,     0,
-		   27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,    27,    27,     0,     0,    27,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,    27,     0,     9,     0,     0,     0,     0,     9,     0,
-		    0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,
-		    0,     0,     0,    27,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,
-		    0,     0,     3,     0,     0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0
-	}}, {{ // h5
-		    0,     0,     0,     0,     0, 19683,     0,  6561,     0,     0,     0,    81,     0,     0,     0,     9,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,    27,     0,     0
-	}}, {{ // a6
-		    0,     0,    81,     0,     0,     0,  2187,     0,     0,     0,    27,     0,     0,     0,     3,     0,
-		    0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b6
-		    0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,
-		    0,     0,     9,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0
-	}}, {{ // c6
-		    0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,   243,     9,     0,     0,     0,     0,     0,     0,   243,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d6
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,    81,     0,     0,     0,     0,     9,     0,     0,     0,     0,     9,
-		    0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e6
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,
-		    9,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f6
-		    0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     9,     0,     9,     0,     0,     0,     0,     9,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0
-	}}, {{ // g6
-		    0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,
-		    0,     0,     0,     9,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0
-	}}, {{ // h6
-		    0,     0,     0,    81,     0,     0,     0,  2187,     0,     0,     0,    27,     0,     0,     0,     3,
-		    0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a7
-		    0,     0,  2187,     0,     0,     0,   729,     0,     0,     0,     9,     0,     0,     0,     0,     0,
-		    0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b7
-		    0,     0,   243,     0,     0,     0,    81,     0,     0, 19683,     1,     0,     0,     0,     0,     0,
-		    0,   729,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c7
-		    0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,
-		    0,   243,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     3,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0
-	}}, {{ // d7
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,     0,     0,
-		    0,    81,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     3,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e7
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,
-		    0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,
-		    0,     0,     0,     3,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0
-	}}, {{ // f7
-		    0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,     0,
-		    0,     9,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,
-		    3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0
-	}}, {{ // g7
-		    0,     0,     0,   243,     0,     0,     0,    81,     0,     1,     0,     1,     0,     0,     0,     0,
-		    0,     3,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h7
-		    0,     0,     0,  2187,     0,     0,     0,   729,     0,     0,     0,     9,     0,     0,     0,     0,
-		    0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a8
-		    0,     0,  6561,     0,     0,     0,   243,     0,     0,  6561,     3,     0,     0, 19683,     1,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b8
-		    0,     0,   729,     0,     0,     0,    27,     0,     0,  2187,     0,     0,     0,     0,     0,     0,
-		    0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c8
-		    0,     0,    27,     0,     0,     0,     9,     0,     0,   729,     0,     0,     0,  6561,     0,     0,
-		    0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d8
-		    0,     0,     0,     0,     0,     0,     3,     1,     0,   243,     0,     0,     0,  2187,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     1,     0,     0,     0,     0
-	}}, {{ // e8
-		    0,     0,     0,     0,     0,     0,     1,     3,     0,    81,     0,     0,     0,     9,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     1,     0,     0
-	}}, {{ // f8
-		    0,     0,     0,    27,     0,     0,     0,     9,     0,    27,     0,     0,     0,     3,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g8
-		    0,     0,     0,   729,     0,     0,     0,    27,     0,     9,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h8
-		    0,     0,     0,  6561,     0,     0,     0,   243,     0,     3,     0,     3,     0,     1,     0,     1,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // PASS
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}
-};
-
-const EVAL_FEATURE_V EVAL_FEATURE_all_opponent = {{
-	 9841,  9841,  9841,  9841, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524,
-//	11111111(3)                 +3^8                       +3^8*2                      +3^8*3         1111111(3)
-	 3280,  3280,  3280,  3280,  9841,  9841,  9841,  9841, 16402, 16402, 16402, 16402, 22963, 22963,  1093,  1093,
-//	              364(=111111(3))+2187        121(=11111(3))+2187+729     40(=1111(3))+2187+729+243
-	 1093,  1093,  2551,  2551,  2551,  2551,  3037,  3037,  3037,  3037,  3199,  3199,  3199,  3199,     0,     0
-}};
-
 #endif
 #if defined(VECTOR_EVAL_UPDATE) || defined(hasSSE2) || defined(__ARM_NEON) || defined(DISPATCH_NEON) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 
@@ -768,75 +449,18 @@ const EVAL_FEATURE_V EVAL_FEATURE_all_opponent = {{
 /** feature offset/size */
 // static const int EVAL_OFS[] = { 0, 19683, 78732, 137781, 196830, 203391, 209952, 216513, 223074, 225261, 225990, 226233, 226314 };
 // static const int EVAL_SIZE[] = {19683, 59049, 59049, 59049, 6561, 6561, 6561, 6561, 2187, 729, 243, 81, 1};
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 static const unsigned short EVAL_OFFSET[] = {
 	    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
 	    0,     0,     0,     0,  6561,  6561,  6561,  6561, 13122, 13122, 13122, 13122, 19683, 19683,     0,     0,
 	    0,     0,  2187,  2187,  2187,  2187,  2916,  2916,  2916,  2916,  3159,  3159,  3159,  3159,     0,     0
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
-#endif
-
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-/** feature offset/size */
-// static const int EVAL_OFS[] = { 0, 19683, 78732, 137781, 196830, 203391, 209952, 216513, 223074, 225261, 225990, 226233, 226314 };
-static const int EVAL_SIZE[] = {19683, 59049, 59049, 59049, 6561, 6561, 6561, 6561, 2187, 729, 243, 81, 1};
-=======
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
 };
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-};
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 
 /** packed feature offset/size */
 static const int EVAL_PACKED_OFS[] = { 0, 10206, 40095, 69741, 99387, 102708, 106029, 109350, 112671, 113805, 114183, 114318, 114363 };
 // static const int EVAL_PACKED_SIZE[] = {10206, 29889, 29646, 29646, 3321, 3321, 3321, 3321, 1134, 378, 135, 45, 1};
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef DEBUG
-static const int EVAL_MAX_VALUE[] = {
-	 19682,  19682,  19682,  19682,
-	 59048,  59048,  59048,  59048,	//  78731,  78731,  78731,  78731,
-	 59048,  59048,  59048,  59048,	// 137780, 137780, 137780, 137780,
-	 59048,  59048,  59048,  59048,	// 196829, 196829, 196829, 196829,
-	  6560,   6560,   6560,   6560,	// 203390, 203390, 203390, 203390,
-	  6560,   6560,   6560,   6560,	// 209951, 209951, 209951, 209951,
-	  6560,   6560,   6560,   6560, // 216512, 216512, X 223073, X 223073,
-	  6560,   6560,			// 223073, 223073,
-	  2186,   2186,   2186,   2186, // 225260, 225260, 225260, 225260,
-	   728,    728,    728,    728, // 225989, 225989, 225989, 225989,
-	   242,    242,    242,    242, // 226232, 226232, 226232, 226232,
-	    80,     80,     80,     80, // 226313, 226313, 226313, 226313,
-	     0				// 226314
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-};
-#endif
-
-/** packed feature offset/size */
-static const int EVAL_PACKED_OFS[] = { 0, 10206, 40095, 69741, 99387, 102708, 106029, 109350, 112671, 113805, 114183, 114318, 114363 };
-// static const int EVAL_PACKED_SIZE[] = {10206, 29889, 29646, 29646, 3321, 3321, 3321, 3321, 1134, 378, 135, 45, 1};
-
-=======
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 /** feature symetry packing */
 typedef struct {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	short EVAL_C10[59049];
 	short EVAL_S10[59049];
 	short EVAL_C9[19683];
@@ -845,41 +469,13 @@ typedef struct {
 	short EVAL_S6[729];
 	short EVAL_S5[243];
 	short EVAL_S4[81];
-<<<<<<< HEAD
-=======
-	short EVAL_C10[2][59049];
-	short EVAL_S10[2][59049];
-	short EVAL_C9[2][19683];
-	short EVAL_S8[2][6561];
-	short EVAL_S7[2][2187];
-	short EVAL_S6[2][729];
-	short EVAL_S5[2][243];
-	short EVAL_S4[2][81];
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 } SymetryPacking;
 
 /** eval weight load status */
 static int EVAL_LOADED = 0;
 
 /** eval weights */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-Eval_weight (*EVAL_WEIGHT)[EVAL_N_PLY - 2];	// for 2..53
-
-/** opponent feature */
-static unsigned short *OPPONENT_FEATURE;
-=======
-short (*EVAL_WEIGHT)[EVAL_N_PLY][EVAL_N_WEIGHT];
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-Eval_weight (*EVAL_WEIGHT)[EVAL_N_PLY];
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
 Eval_weight (*EVAL_WEIGHT)[EVAL_N_PLY - 2];	// for 2..53
->>>>>>> 6de3ab1 (Omit eval_weight table for ply > 53)
 
 /** opponent feature */
 static unsigned short *OPPONENT_FEATURE;
@@ -891,14 +487,11 @@ static double EVAL_A, EVAL_B, EVAL_C, EVAL_a, EVAL_b, EVAL_c;
  * @brief Opponent feature.
  *
  * Compute a feature from the opponent point of view.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @param p opponent feature pointer.
  * @param o opponent feature base to next depth.
  * @param d feature size.
  * @return updated opponent feature pointer
  */
-<<<<<<< HEAD
 // #define OPPONENT(x)	((9 >> (x)) & 3)	// (0, 1, 2) to (1, 0, 2)
 static unsigned short *set_opponent_feature(unsigned short *p, int o, int d)
 {
@@ -957,84 +550,6 @@ static int set_eval_packing(short *pe, int *T, const int *kd, int l, int k, int
 		}
 	}
 	return n;
-=======
-#define	OPPONENT(x)	((9 >> (x)) & 3)	// (0, 1, 2) to (1, 0, 2)
-
-static int opponent_feature(int q[], int d)
-{
-	int f = 0;
-	while (--d >= 0)
-		f = f * 3 + OPPONENT(q[d]);
-	return f;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
- * @param p opponent feature array to set.
-=======
- * @param p opponent feature pointer.
->>>>>>> bb33695 (Cleaner eval_open unpacking)
- * @param o opponent feature base to next depth.
- * @param d feature size.
- * @return updated opponent feature pointer
- */
-// #define OPPONENT(x)	((9 >> (x)) & 3)	// (0, 1, 2) to (1, 0, 2)
-static unsigned short *set_opponent_feature(unsigned short *p, int o, int d)
-{
-	if (--d) {
-		p = set_opponent_feature(p, (o + 1) * 3, d);
-		p = set_opponent_feature(p, o * 3, d);
-		p = set_opponent_feature(p, (o + 2) * 3, d);
-	} else {
-		*p++ = o + 1;
-		*p++ = o;
-		*p++ = o + 2;
-	}
-	return p;
-}
-
-/**
- * @brief Create eval packing index.
- *
- * Create an index array to reduce mirror positions.
- * @param pe pointer to array to set result.
- * @param T internally used array to check mirror positions.
- * @param kd feature increment at each depth for the mirror position.
- * @param l feature index.
- * @param k feature index for the mirror position.
- * @param n packed count so far.
- * @param d feature size, >= 4.
- * @return updated packed count.
- */
-static int set_eval_packing(short *pe, int *T, const int *kd, int l, int k, int n, int d)
-{
-	int	i, q0, q1, q2, q3;
-
-	if (--d > 3) {
-		l *= 3;
-		n = set_eval_packing(pe, T, kd, l, k, n, d);
-		k += kd[d];
-		n = set_eval_packing(pe, T, kd, l + 3, k, n, d);
-		k += kd[d];
-		n = set_eval_packing(pe, T, kd, l + 6, k, n, d);
-	} else {
-		l *= 27;
-		for (q3 = 0; q3 < 3; ++q3) {
-			for (q2 = 0; q2 < 3; ++q2) {
-				for (q1 = 0; q1 < 3; ++q1) {
-					for (q0 = 0; q0 < 3; ++q0) {
-						if (k < l) i = T[k];
-						else T[l] = i = n++;
-						pe[l++] = i;
-						k += kd[0];
-					}
-					k += (kd[1] - kd[0] * 3);
-				}
-				k += (kd[2] - kd[1] * 3);
-			}
-			k += (kd[3] - kd[2] * 3);
-		}
-	}
-	return n;
->>>>>>> 48873fa (calc opponent_feature once in eval_open)
 }
 
 /**
@@ -1054,16 +569,9 @@ void eval_open(const char* file)
 	double date;
 	const int n_w = 114364;
 	int *T;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int ply, i, j, k;
 	int r;
 	FILE* f;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
 	short *w;
 	Eval_weight *pe;
 	SymetryPacking (*P)[2];
@@ -1071,35 +579,6 @@ void eval_open(const char* file)
 	static const int kd_S10[] = { 19683, 6561, 2187, 729, 243, 81, 27, 9, 3, 1 };
 	static const int kd_C10[] = { 19683, 6561, 2187, 729, 81, 243, 27, 9, 3, 1 };
 	static const int kd_C9[] = { 1, 9, 3, 81, 27, 243, 2187, 729, 6561 };
-=======
-	int ply, i, j, k, kc, l, l4, l5, l6, l7, n, n4, n5, n6, n7, nc, o4, o5, o6, o7;
-=======
-	int ply, i, j, k, l, n, l4, l5, l6, l7, n4, n5, n6, n7;
-	int kc, nc, c10ofs;
->>>>>>> 48873fa (calc opponent_feature once in eval_open)
-=======
-	int ply, i, j, k;
->>>>>>> bb33695 (Cleaner eval_open unpacking)
-	int r;
-	FILE* f;
-	short *w;
-	int *O;
-	SymetryPacking *P;
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-=======
-=======
-	short *w, *pe;
-	SymetryPacking (*P)[2];
-	SymetryPacking *pp;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-	static const int kd_S10[] = { 19683, 6561, 2187, 729, 243, 81, 27, 9, 3, 1 };
-	static const int kd_C10[] = { 19683, 6561, 2187, 729, 81, 243, 27, 9, 3, 1 };
->>>>>>> bb33695 (Cleaner eval_open unpacking)
-	static const int kd_C9[] = { 1, 9, 3, 81, 27, 243, 2187, 729, 6561 };
->>>>>>> 48873fa (calc opponent_feature once in eval_open)
 
 	if (EVAL_LOADED++) return;
 
@@ -1110,12 +589,6 @@ void eval_open(const char* file)
 	if (sizeof (short) != 2) fatal_error("short size is not compatible with Edax.\n");
 
 	// create unpacking tables
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
 	OPPONENT_FEATURE = (unsigned short *) malloc(59049 * sizeof(unsigned short));	// 3^10
 	P = (SymetryPacking (*)[2]) malloc(2 * sizeof(*P));
 	T = (int *) malloc(2 * 59049 * sizeof(*T));
@@ -1127,7 +600,6 @@ void eval_open(const char* file)
 	set_eval_packing((*P)[0].EVAL_S8, T, kd_S10 + 2, 0, 0, 0, 8);	/* 8 squares : 6561 -> 3321 */
 	for (j = 0; j < 6561; ++j)
 		(*P)[1].EVAL_S8[j] = (*P)[0].EVAL_S8[OPPONENT_FEATURE[j + 26244]];	// 1100000000(3)
-<<<<<<< HEAD
 
 	set_eval_packing((*P)[0].EVAL_S7, T, kd_S10 + 3, 0, 0, 0, 7);	/* 7 squares : 2187 -> 1134 */
 	for (j = 0; j < 2187; ++j)
@@ -1154,137 +626,13 @@ void eval_open(const char* file)
 	for (j = 0; j < 59049; ++j) {
 		(*P)[1].EVAL_S10[j] = (*P)[0].EVAL_S10[OPPONENT_FEATURE[j]];
 		(*P)[1].EVAL_C10[j] = (*P)[0].EVAL_C10[OPPONENT_FEATURE[j]];
-=======
-	P = (SymetryPacking *) malloc(sizeof(*P));
-=======
-	OPPONENT_FEATURE = (unsigned short *) malloc(59049 * sizeof(*OPPONENT_FEATURE));
-=======
-	OPPONENT_FEATURE = (unsigned short *) malloc(59049 * sizeof(unsigned short));
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-	P = (SymetryPacking (*)[2]) malloc(2 * sizeof(*P));
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-	T = (int *) malloc(2 * 59049 * sizeof(*T));
-	if ((OPPONENT_FEATURE == NULL) || (P == NULL) || (T == NULL))
-		fatal_error("Cannot allocate temporary table variable.\n");
-
-	set_opponent_feature(OPPONENT_FEATURE, 0, 10);
-
-	set_eval_packing((*P)[0].EVAL_S8, T, kd_S10 + 2, 0, 0, 0, 8);	/* 8 squares : 6561 -> 3321 */
-	for (j = 0; j < 6561; ++j)
-		(*P)[1].EVAL_S8[j] = (*P)[0].EVAL_S8[OPPONENT_FEATURE[j + 26244]];
-=======
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-
-	set_eval_packing((*P)[0].EVAL_S7, T, kd_S10 + 3, 0, 0, 0, 7);	/* 7 squares : 2187 -> 1134 */
-	for (j = 0; j < 2187; ++j)
-		(*P)[1].EVAL_S7[j] = (*P)[0].EVAL_S7[OPPONENT_FEATURE[j + 28431]];	// 1110000000(3)
-
-	set_eval_packing((*P)[0].EVAL_S6, T, kd_S10 + 4, 0, 0, 0, 6);	/* 6 squares : 729 -> 378 */
-	for (j = 0; j < 729; ++j)
-		(*P)[1].EVAL_S6[j] = (*P)[0].EVAL_S6[OPPONENT_FEATURE[j + 29160]];	// 1111000000(3)
-
-	set_eval_packing((*P)[0].EVAL_S5, T, kd_S10 + 5, 0, 0, 0, 5);	/* 5 squares : 243 -> 135 */
-	for (j = 0; j < 243; ++j)
-		(*P)[1].EVAL_S5[j] = (*P)[0].EVAL_S5[OPPONENT_FEATURE[j + 29403]];	// 1111100000(3)
-
-	set_eval_packing((*P)[0].EVAL_S4, T, kd_S10 + 6, 0, 0, 0, 4);	/* 4 squares : 81 -> 45 */
-	for (j = 0; j < 81; ++j)
-		(*P)[1].EVAL_S4[j] = (*P)[0].EVAL_S4[OPPONENT_FEATURE[j + 29484]];	// 1111110000(3)
-
-	set_eval_packing((*P)[0].EVAL_C9, T, kd_C9, 0, 0, 0, 9);	/* 9 corner squares : 19683 -> 10206 */
-	for (j = 0; j < 19683; ++j)
-		(*P)[1].EVAL_C9[j] = (*P)[0].EVAL_C9[OPPONENT_FEATURE[j + 19683]];	// 1000000000(3)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	k = l = n = 0;	/* 10 squares (edge + X) : 59049 -> 29646 */
-	nc = 0;		/* 10 squares (angle + X) : 59049 -> 29889 */
-	for (q9 = 0; q9 < 3; ++q9, k += (1 - 3 * 3))
-	for (q8 = 0; q8 < 3; ++q8, k += (3 - 9 * 3))
-	for (q7 = 0; q7 < 3; ++q7, k += (9 - 27 * 3))
-	for (q6 = 0; q6 < 3; ++q6, k += (27 - 81 * 3))
-	for (q5 = 0; q5 < 3; ++q5, k += (81 - 243 * 3))
-	for (q4 = 0; q4 < 3; ++q4, k += (243 - 729 * 3)) {
-		c10ofs = q5 * (243 - 81) + q4 * (81 - 243);
-		for (q3 = 0; q3 < 3; ++q3, k += (729 - 2187 * 3))
-		for (q2 = 0; q2 < 3; ++q2, k += (2187 - 6561 * 3))
-		for (q1 = 0; q1 < 3; ++q1, k += (6561 - 19683 * 3))
-		for (q0 = 0; q0 < 3; ++q0, k += 19683) {
-			// k = q9 + q8 * 3 + q7 * 9 + q6 * 27 + q5 * 81 + q4 * 243 + q3 * 729 + q2 * 2187 + q1 * 6561 + q0 * 19683;
-			if (k < l) i = T[k];
-			else T[l] = i = n++;
-			P->EVAL_S10[0][l] = i;
-			P->EVAL_S10[1][O[l]] = i;
-
-<<<<<<< HEAD
-		// k = q[9] + q[8] * 3 + q[7] * 9 + q[6] * 27 + q[5] * 243 + q[4] * 81 + q[3] * 729 + q[2] * 2187 + q[1] * 6561 + q[0] * 19683;
-		kc = k + q[5] * (243 - 81) + q[4] * (81 - 243);
-		if (kc < l) i = T[kc + 59049];
-		else T[l + 59049] = i = nc++;
-		P->EVAL_C10[0][l] = i;
-		P->EVAL_C10[1][j] = i;
-		++l;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-			// k = q9 + q8 * 3 + q7 * 9 + q6 * 27 + q5 * 243 + q4 * 81 + q3 * 729 + q2 * 2187 + q1 * 6561 + q0 * 19683;
-			kc = k + c10ofs;
-			if (kc < l) i = T[kc + 59049];
-			else T[l + 59049] = i = nc++;
-			P->EVAL_C10[0][l] = i;
-			P->EVAL_C10[1][O[l]] = i;
-			++l;
-		}
->>>>>>> 48873fa (calc opponent_feature once in eval_open)
-=======
-	set_eval_packing(P->EVAL_S10[0], T, kd_S10, 0, 0, 0, 10);	/* 10 squares (edge + X) : 59049 -> 29646 */
-	set_eval_packing(P->EVAL_C10[0], T, kd_C10, 0, 0, 0, 10);	/* 10 squares (angle + X) : 59049 -> 29889 */
-	for (j = 0; j < 59049; ++j) {
-		P->EVAL_S10[1][j] = P->EVAL_S10[0][O[j]];
-		P->EVAL_C10[1][j] = P->EVAL_C10[0][O[j]];
->>>>>>> bb33695 (Cleaner eval_open unpacking)
-=======
-	set_eval_packing((*P)[0].EVAL_S10, T, kd_S10, 0, 0, 0, 10);	/* 10 squares (edge + X) : 59049 -> 29646 */
-	set_eval_packing((*P)[0].EVAL_C10, T, kd_C10, 0, 0, 0, 10);	/* 10 squares (angle + X) : 59049 -> 29889 */
-	for (j = 0; j < 59049; ++j) {
-		(*P)[1].EVAL_S10[j] = (*P)[0].EVAL_S10[OPPONENT_FEATURE[j]];
-		(*P)[1].EVAL_C10[j] = (*P)[0].EVAL_C10[OPPONENT_FEATURE[j]];
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	}
 
 	free(T);
 
 	// allocation
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	EVAL_WEIGHT = (Eval_weight(*)[EVAL_N_PLY - 2]) malloc(sizeof(*EVAL_WEIGHT));
-=======
-	EVAL_WEIGHT = (short (*)[61][EVAL_N_WEIGHT]) malloc(2 * sizeof (*EVAL_WEIGHT));
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	EVAL_WEIGHT = (short (*)[61][EVAL_N_WEIGHT]) malloc(sizeof (*EVAL_WEIGHT));
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
-	EVAL_WEIGHT = (Eval_weight(*)[EVAL_N_PLY]) malloc(sizeof (*EVAL_WEIGHT));
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
-	EVAL_WEIGHT = (Eval_weight(*)[EVAL_N_PLY]) malloc(sizeof(*EVAL_WEIGHT));
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-	EVAL_WEIGHT = (Eval_weight(*)[EVAL_N_PLY]) malloc(sizeof(*EVAL_WEIGHT));
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-	if (EVAL_WEIGHT == NULL) fatal_error("Cannot allocate evaluation weights.\n");
-=======
-	EVAL_WEIGHT = (short (*)[][EVAL_N_WEIGHT]) malloc(2 * sizeof (*EVAL_WEIGHT));
-<<<<<<< HEAD
-	if (EVAL_WEIGHT == NULL) fatal_error("Cannot evaluation weights.\n");
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
 	if (EVAL_WEIGHT == NULL) fatal_error("Cannot allocate evaluation weights.\n");
->>>>>>> 48873fa (calc opponent_feature once in eval_open)
 
 	// data reading
 	w = (short*) malloc(n_w * sizeof (*w)); // a temporary to read packed weights
@@ -1316,9 +664,6 @@ void eval_open(const char* file)
 
 		if (edax_header == XADE) for (i = 0; i < n_w; ++i) w[i] = bswap_short(w[i]);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		pe = *EVAL_WEIGHT + ply - 2;
 		pp = *P + (ply & 1);
 		for (k = 0; k < 19683; k++) {
@@ -1346,95 +691,10 @@ void eval_open(const char* file)
 		for (k = 0; k < 243; k++) {
 			pe->S7654[k + 2916] = w[pp->EVAL_S5[k] + EVAL_PACKED_OFS[10]];
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
 		for (k = 0; k < 81; k++) {
-=======
-		for (k = 0; k < 91; k++) {
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 			pe->S7654[k + 3159] = w[pp->EVAL_S4[k] + EVAL_PACKED_OFS[11]];
 		}
 		pe->S0 = w[EVAL_PACKED_OFS[12]];
-=======
-		for (j = 0; j <= 1; ++j) {
-			pe = EVAL_WEIGHT[j][ply];
-			pp = *P + j;
-			for (k = 0; k < EVAL_SIZE[0]; k++) {
-				pe[k] = w[pp->EVAL_C9[k] + EVAL_PACKED_OFS[0]];
-			}
-			for (k = 0; k < EVAL_SIZE[1]; k++) {
-				pe[k + 19683] = w[pp->EVAL_C10[k] + EVAL_PACKED_OFS[1]];
-			}
-			for (k = 0; k < EVAL_SIZE[2]; k++) {
-				i = pp->EVAL_S10[k];
-				pe[k + 78732] = w[i + EVAL_PACKED_OFS[2]];
-				pe[k + 137781] = w[i + EVAL_PACKED_OFS[3]];
-			}
-			for (k = 0; k < EVAL_SIZE[4]; k++) {
-				i = pp->EVAL_S8[k];
-				pe[k + 196830] = w[i + EVAL_PACKED_OFS[4]];
-				pe[k + 203391] = w[i + EVAL_PACKED_OFS[5]];
-				pe[k + 209952] = w[i + EVAL_PACKED_OFS[6]];
-				pe[k + 216513] = w[i + EVAL_PACKED_OFS[7]];
-			}
-			for (k = 0; k < EVAL_SIZE[8]; k++) {
-				pe[k + 223074] = w[pp->EVAL_S7[k] + EVAL_PACKED_OFS[8]];
-			}
-			for (k = 0; k < EVAL_SIZE[9]; k++) {
-				pe[k + 225261] = w[pp->EVAL_S6[k] + EVAL_PACKED_OFS[9]];
-			}
-			for (k = 0; k < EVAL_SIZE[10]; k++) {
-				pe[k + 225990] = w[pp->EVAL_S5[k] + EVAL_PACKED_OFS[10]];
-			}
-			for (k = 0; k < EVAL_SIZE[11]; k++) {
-				pe[k + 226233] = w[pp->EVAL_S4[k] + EVAL_PACKED_OFS[11]];
-			}
-			pe[226314] = w[EVAL_PACKED_OFS[12]];
-		}
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-		pe = (*EVAL_WEIGHT)[ply];
-=======
-		pe = *EVAL_WEIGHT + ply;
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-		pp = *P + (ply & 1);
-		for (k = 0; k < 19683; k++) {
-			pe->C9[k] = w[pp->EVAL_C9[k] + EVAL_PACKED_OFS[0]];
-		}
-		for (k = 0; k < 59049; k++) {
-			pe->C10[k] = w[pp->EVAL_C10[k] + EVAL_PACKED_OFS[1]];
-			i = pp->EVAL_S10[k];
-			pe->S100[k] = w[i + EVAL_PACKED_OFS[2]];
-			pe->S101[k] = w[i + EVAL_PACKED_OFS[3]];
-		}
-		for (k = 0; k < 6561; k++) {
-			i = pp->EVAL_S8[k];
-			pe->S8x4[k] = w[i + EVAL_PACKED_OFS[4]];
-			pe->S8x4[k + 6561] = w[i + EVAL_PACKED_OFS[5]];
-			pe->S8x4[k + 13122] = w[i + EVAL_PACKED_OFS[6]];
-			pe->S8x4[k + 19683] = w[i + EVAL_PACKED_OFS[7]];
-		}
-		for (k = 0; k < 2187; k++) {
-			pe->S7654[k] = w[pp->EVAL_S7[k] + EVAL_PACKED_OFS[8]];
-		}
-		for (k = 0; k < 729; k++) {
-			pe->S7654[k + 2187] = w[pp->EVAL_S6[k] + EVAL_PACKED_OFS[9]];
-		}
-		for (k = 0; k < 243; k++) {
-			pe->S7654[k + 2916] = w[pp->EVAL_S5[k] + EVAL_PACKED_OFS[10]];
-		}
-		for (k = 0; k < 91; k++) {
-=======
-		for (k = 0; k < 81; k++) {
->>>>>>> 2bfbe23 (Correct errors causing heap corrupt on MSVC builds)
-			pe->S7654[k + 3159] = w[pp->EVAL_S4[k] + EVAL_PACKED_OFS[11]];
-		}
-<<<<<<< HEAD
-		pe[226314] = w[EVAL_PACKED_OFS[12]];
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
-		pe->S0 = w[EVAL_PACKED_OFS[12]];
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
 	}
 
 	fclose(f);
@@ -1449,24 +709,7 @@ void eval_open(const char* file)
 	info("<Evaluation function weights version %u.%u.%u loaded>\n", version, release, build);
 
 	// f = fopen("eval.bin", "wb");
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	// fwrite(*EVAL_WEIGHT, sizeof(Eval_weight), EVAL_N_PLY, f);
-=======
-	// for (i = 0; i < 2; ++i)
-	//	for (ply = 0; ply < EVAL_N_PLY; ply++) {
-	//		fwrite(EVAL_WEIGHT[i][ply], sizeof(short), EVAL_N_WEIGHT, f);
-	//	}
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-	// for (ply = 0; ply < EVAL_N_PLY; ply++) {
-	//	fwrite(EVAL_WEIGHT[ply], sizeof(short), EVAL_N_WEIGHT, f);
-	// }
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
-	// fwrite(*EVAL_WEIGHT, sizeof(Eval_weight), EVAL_N_PLY, f);
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
 	// fclose(f);
 }
 
@@ -1475,26 +718,11 @@ void eval_open(const char* file)
  */
 void eval_close(void)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	free(OPPONENT_FEATURE);
-=======
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
 	free(OPPONENT_FEATURE);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	free(EVAL_WEIGHT);
 	EVAL_WEIGHT = NULL;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifdef ANDROID
 extern void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in);
 #elif defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
@@ -1503,48 +731,6 @@ extern void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const E
 
 #if !defined(hasSSE2) && !defined(__ARM_NEON)
 
-=======
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-#if defined(__SSE2__) || defined(USE_GAS_MMX)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-=======
-/**
- * @brief Swap player's feature.
- *
- * @param eval  Evaluation function.
- */
-void eval_swap(Eval *eval)
-{
-	eval->player ^= 1;
-}
-
->>>>>>> 6b942ef (Make eval_swap public and inline some)
-=======
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#if defined(hasSSE2) || defined(hasNeon) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-#ifdef ANDROID
-extern void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in);
-<<<<<<< HEAD
-#elif defined(hasSSE2) || defined(hasNeon) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
-#elif defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-#include "eval_sse.c"
-#endif
-
-#if !defined(hasSSE2) && !defined(__ARM_NEON)
-
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
 /**
  * @brief Set up evaluation features from a board.
  *
@@ -1553,39 +739,8 @@ extern void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const E
  */
 void eval_set(Eval *eval, const Board *board)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	int	i, x;
-  #ifdef VECTOR_EVAL_UPDATE
-	unsigned long long b = (eval->n_empties & 1) ? board->opponent : board->player;
-
-	eval->feature = EVAL_FEATURE_all_opponent;
-	foreach_bit (x, b)
-		for (i = 0; i < 12; ++i)
-			eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i];
-
-	b = ~(board->opponent | board->player);
-	foreach_bit (x, b)
-		for (i = 0; i < 12; ++i)
-			eval->feature.ull[i] += EVAL_FEATURE[x].ull[i];
-
-  #else
-	int	j;
-=======
-=======
-#ifdef VECTOR_EVAL_UPDATE
-	int i, x;
-=======
-	int	i, j, x;
-  #ifdef VECTOR_EVAL_UPDATE
-	widest_register	r;
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
 	int	i, x;
   #ifdef VECTOR_EVAL_UPDATE
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 	unsigned long long b = (eval->n_empties & 1) ? board->opponent : board->player;
 
 	eval->feature = EVAL_FEATURE_all_opponent;
@@ -1598,18 +753,8 @@ void eval_set(Eval *eval, const Board *board)
 		for (i = 0; i < 12; ++i)
 			eval->feature.ull[i] += EVAL_FEATURE[x].ull[i];
 
-<<<<<<< HEAD
-#else
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-	int i, j, x;
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
   #else
-<<<<<<< HEAD
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
 	int	j;
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 	Board	b;
 
 	if (eval->n_empties & 1) {
@@ -1618,74 +763,16 @@ void eval_set(Eval *eval, const Board *board)
 	} else	b = *board;
 
 	for (i = 0; i < EVAL_N_FEATURE; ++i) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		x = 0;
 		for (j = 0; j < EVAL_F2X[i].n_square; j++) {
 			x = x * 3 + board_get_square_color(&b, EVAL_F2X[i].x[j]);
 		}
 		eval->feature.us[i] = x + EVAL_OFFSET[i];
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-		eval->feature.us[i] = 0;
-=======
-		search->eval.feature.us[i] = 0;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-		x = 0;
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-		for (j = 0; j < EVAL_F2X[i].n_square; j++) {
-			x = x * 3 + board_get_square_color(&b, EVAL_F2X[i].x[j]);
-		}
-<<<<<<< HEAD
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
   #endif
-=======
-	search->eval.player = 0;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-		eval->feature.us[i] = x;
-=======
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-	}
-<<<<<<< HEAD
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
-#endif
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-  #endif
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 }
 
 /**
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
- * @brief Swap player's feature.
- *
- * @param x     Move position.
- * @param f     Flipped bitboard.
- * @param eval  Evaluation function.
- */
-<<<<<<< HEAD
-static void eval_swap(Eval *eval)
-{
-<<<<<<< HEAD
-	eval->player ^= 1;
-}
-
-/**
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
->>>>>>> 6b942ef (Make eval_swap public and inline some)
  * @brief Update the features after a player's move.
  *
  * @param x     Move position.
@@ -1694,10 +781,6 @@ static void eval_swap(Eval *eval)
  */
 static void eval_update_0(int x, unsigned long long f, Eval *eval)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
   #ifdef VECTOR_EVAL_UPDATE
 	int	i;
 
@@ -1705,82 +788,17 @@ static void eval_update_0(int x, unsigned long long f, Eval *eval)
 		eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i] << 1;
 
 	foreach_bit (x, f)
-<<<<<<< HEAD
-		for (i = 0; i < 12; ++i)
-			eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i];
-=======
-	const CoordinateToFeature *s = EVAL_X2F + move->x;
-	int x;
-=======
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-	unsigned long long f = move->flipped;
-	int	j, x;
-=======
-static void eval_update_0(int x, unsigned long long f, Eval *eval)
-{
-	int	j;
-<<<<<<< HEAD
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-	widest_register	b;
-#ifdef VECTOR_EVAL_UPDATE
-=======
-	widest_register	r;
-  #ifdef VECTOR_EVAL_UPDATE
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-	int	i;
-
-	for (i = 0; i < 12; ++i)
-		eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i] << 1;
-
-<<<<<<< HEAD
-	for (j = 0; j < 64; j += sizeof(widest_register) * CHAR_BIT) {
-		foreach_bit_r (x, f, b)
-			for (i = 0; i < 12; ++i)
-				eval->feature.ull[i] -= EVAL_FEATURE[x + j].ull[i];
-	}
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-
-<<<<<<< HEAD
-=======
-	foreach_bit_r (x, f, j, r)
-=======
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 		for (i = 0; i < 12; ++i)
 			eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i];
 
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
   #else
 	const CoordinateToFeature *s = EVAL_X2F + x;
 
-=======
-#else
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-	const CoordinateToFeature *s = EVAL_X2F + move->x;
-=======
-	const CoordinateToFeature *s = EVAL_X2F + x;
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 	switch (s->n_feature) {
 	default:
-<<<<<<< HEAD
-<<<<<<< HEAD
 		eval->feature.us[s->feature[6].i] -= 2 * s->feature[6].x;	// FALLTHRU
 	case 6:	eval->feature.us[s->feature[5].i] -= 2 * s->feature[5].x;	// FALLTHRU
 	case 5:	eval->feature.us[s->feature[4].i] -= 2 * s->feature[4].x;	// FALLTHRU
-=======
-		eval->feature.us[s->feature[6].i] -= 2 * s->feature[6].x;
-	case 6:	eval->feature.us[s->feature[5].i] -= 2 * s->feature[5].x;
-	case 5:	eval->feature.us[s->feature[4].i] -= 2 * s->feature[4].x;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-		eval->feature.us[s->feature[6].i] -= 2 * s->feature[6].x;	// FALLTHRU
-	case 6:	eval->feature.us[s->feature[5].i] -= 2 * s->feature[5].x;	// FALLTHRU
-	case 5:	eval->feature.us[s->feature[4].i] -= 2 * s->feature[4].x;	// FALLTHRU
->>>>>>> bc93772 (Avoid modern compliler warnings)
 	case 4:	eval->feature.us[s->feature[3].i] -= 2 * s->feature[3].x;
 		eval->feature.us[s->feature[2].i] -= 2 * s->feature[2].x;
 		eval->feature.us[s->feature[1].i] -= 2 * s->feature[1].x;
@@ -1788,71 +806,21 @@ static void eval_update_0(int x, unsigned long long f, Eval *eval)
 		break;
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	foreach_bit (x, f) {
 		s = EVAL_X2F + x;
 		switch (s->n_feature) {
 		default:
-<<<<<<< HEAD
 			eval->feature.us[s->feature[6].i] -= s->feature[6].x;	// FALLTHRU
 		case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;	// FALLTHRU
 		case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;	// FALLTHRU
-=======
-			eval->feature.us[s->feature[6].i] -= s->feature[6].x;
-		case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;
-		case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-	foreach_bit_r (x, f, j, r) {
-=======
-	foreach_bit (x, f) {
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-		s = EVAL_X2F + x;
-		switch (s->n_feature) {
-		default:
-			eval->feature.us[s->feature[6].i] -= s->feature[6].x;	// FALLTHRU
-		case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;	// FALLTHRU
-		case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;	// FALLTHRU
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 		case 4:	eval->feature.us[s->feature[3].i] -= s->feature[3].x;
 			eval->feature.us[s->feature[2].i] -= s->feature[2].x;
 			eval->feature.us[s->feature[1].i] -= s->feature[1].x;
 			eval->feature.us[s->feature[0].i] -= s->feature[0].x;
 			break;
-<<<<<<< HEAD
-		}
-	}
-<<<<<<< HEAD
-  #endif
-=======
-
-=======
-	for (j = 0; j < 64; j += sizeof(widest_register) * CHAR_BIT) {
-		foreach_bit_r (x, f, b) {
-			s = EVAL_X2F + x + j;
-			switch (s->n_feature) {
-			default:
-				eval->feature.us[s->feature[6].i] -= s->feature[6].x;	// FALLTHRU
-			case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;	// FALLTHRU
-			case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;	// FALLTHRU
-			case 4:	eval->feature.us[s->feature[3].i] -= s->feature[3].x;
-				eval->feature.us[s->feature[2].i] -= s->feature[2].x;
-				eval->feature.us[s->feature[1].i] -= s->feature[1].x;
-				eval->feature.us[s->feature[0].i] -= s->feature[0].x;
-				break;
-			}
-		}
-	}
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-#endif
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
 		}
 	}
   #endif
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 }
 
 /**
@@ -1864,60 +832,8 @@ static void eval_update_0(int x, unsigned long long f, Eval *eval)
  */
 static void eval_update_1(int x, unsigned long long f, Eval *eval)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
   #ifdef VECTOR_EVAL_UPDATE
 	int	i;
-=======
-	const CoordinateToFeature *s = EVAL_X2F + move->x;
-	int x;
-	unsigned long long f = move->flipped;
-<<<<<<< HEAD
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-
-<<<<<<< HEAD
-	for (i = 0; i < 12; ++i)
-		eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i];
-
-<<<<<<< HEAD
-	foreach_bit (x, f)
-		for (i = 0; i < 12; ++i)
-			eval->feature.ull[i] += EVAL_FEATURE[x].ull[i];
-
-  #else
-	const CoordinateToFeature *s = EVAL_X2F + x;
-
-	switch (s->n_feature) {
-	default:
-	       	eval->feature.us[s->feature[6].i] -= s->feature[6].x;	// FALLTHRU
-	case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;	// FALLTHRU
-	case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;	// FALLTHRU
-=======
-=======
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-#ifdef DEBUG
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-	int i, j;
-=======
-	unsigned long long f = move->flipped;
-	int	j, x;
-=======
-	int	j;
-<<<<<<< HEAD
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-	widest_register	b;
-#ifdef VECTOR_EVAL_UPDATE
-=======
-	widest_register	r;
-  #ifdef VECTOR_EVAL_UPDATE
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-	int	i;
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 
 	for (i = 0; i < 12; ++i)
 		eval->feature.ull[i] -= EVAL_FEATURE[x].ull[i];
@@ -1931,16 +847,9 @@ static void eval_update_1(int x, unsigned long long f, Eval *eval)
 
 	switch (s->n_feature) {
 	default:
-<<<<<<< HEAD
-	       	eval->feature.us[s->feature[6].i] -= s->feature[6].x;
-	case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;
-	case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
 	       	eval->feature.us[s->feature[6].i] -= s->feature[6].x;	// FALLTHRU
 	case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;	// FALLTHRU
 	case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;	// FALLTHRU
->>>>>>> bc93772 (Avoid modern compliler warnings)
 	case 4:	eval->feature.us[s->feature[3].i] -= s->feature[3].x;
 	       	eval->feature.us[s->feature[2].i] -= s->feature[2].x;
 	       	eval->feature.us[s->feature[1].i] -= s->feature[1].x;
@@ -1948,80 +857,27 @@ static void eval_update_1(int x, unsigned long long f, Eval *eval)
 	       	break;
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	foreach_bit (x, f) {
 		s = EVAL_X2F + x;
 		switch (s->n_feature) {
 		default:
-<<<<<<< HEAD
 		       	eval->feature.us[s->feature[6].i] += s->feature[6].x;	// FALLTHRU
 		case 6:	eval->feature.us[s->feature[5].i] += s->feature[5].x;	// FALLTHRU
 		case 5:	eval->feature.us[s->feature[4].i] += s->feature[4].x;	// FALLTHRU
-=======
-		       	eval->feature.us[s->feature[6].i] += s->feature[6].x;
-		case 6:	eval->feature.us[s->feature[5].i] += s->feature[5].x;
-		case 5:	eval->feature.us[s->feature[4].i] += s->feature[4].x;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-	foreach_bit_r (x, f, j, r) {
-=======
-	foreach_bit (x, f) {
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-		s = EVAL_X2F + x;
-		switch (s->n_feature) {
-		default:
-		       	eval->feature.us[s->feature[6].i] += s->feature[6].x;	// FALLTHRU
-		case 6:	eval->feature.us[s->feature[5].i] += s->feature[5].x;	// FALLTHRU
-		case 5:	eval->feature.us[s->feature[4].i] += s->feature[4].x;	// FALLTHRU
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 		case 4:	eval->feature.us[s->feature[3].i] += s->feature[3].x;
 		       	eval->feature.us[s->feature[2].i] += s->feature[2].x;
 		       	eval->feature.us[s->feature[1].i] += s->feature[1].x;
 		       	eval->feature.us[s->feature[0].i] += s->feature[0].x;
 		       	break;
-<<<<<<< HEAD
-=======
-	for (j = 0; j < 64; j += sizeof(widest_register) * CHAR_BIT) {
-		foreach_bit_r (x, f, b) {
-			s = EVAL_X2F + x + j;
-			switch (s->n_feature) {
-			default:
-			       	eval->feature.us[s->feature[6].i] += s->feature[6].x;	// FALLTHRU
-			case 6:	eval->feature.us[s->feature[5].i] += s->feature[5].x;	// FALLTHRU
-			case 5:	eval->feature.us[s->feature[4].i] += s->feature[4].x;	// FALLTHRU
-			case 4:	eval->feature.us[s->feature[3].i] += s->feature[3].x;
-			       	eval->feature.us[s->feature[2].i] += s->feature[2].x;
-			       	eval->feature.us[s->feature[1].i] += s->feature[1].x;
-			       	eval->feature.us[s->feature[0].i] += s->feature[0].x;
-			       	break;
-			}
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 		}
 	}
-<<<<<<< HEAD
   #endif
-=======
-#endif
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-		}
-	}
-  #endif
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 }
 
 void eval_update(int x, unsigned long long f, Eval *eval)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	assert(f);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
   #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON)
 	if (hasSSE2) {
 		eval_update_sse(x, f, eval, eval);
@@ -2032,49 +888,9 @@ void eval_update(int x, unsigned long long f, Eval *eval)
 		eval_update_1(x, f, eval);
 	else
 		eval_update_0(x, f, eval);
-=======
-	assert(move->flipped);
-=======
-	assert(f);
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID)
-=======
-  #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID)
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-	if (hasSSE2) {
-		eval_update_sse(x, f, eval, eval);
-		return;
-	}
-  #endif
-	if (eval->n_empties & 1)
-		eval_update_1(x, f, eval);
-	else
-<<<<<<< HEAD
-		eval_update_0(eval, move);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	eval_swap(eval);
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-	}
->>>>>>> 6b942ef (Make eval_swap public and inline some)
-=======
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-}
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-void eval_update_leaf(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
-=======
-=======
-void eval_update_leaf(Eval *eval_out, const Eval *eval_in, const Move *move)
-=======
-		eval_update_0(x, f, eval);
 }
 
 void eval_update_leaf(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
 {
   #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON)
 	if (hasSSE2) {
@@ -2089,168 +905,7 @@ void eval_update_leaf(int x, unsigned long long f, Eval *eval_out, const Eval *e
 		eval_update_0(x, f, eval_out);
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-#if 0 // replaced with simple save-restore
-
-/**
- * @brief Restore the features as before a player's move.
- *
- * @param eval  Evaluation function.
- * @param move  Move.
- */
-static void eval_restore_0(Eval *eval, const Move *move)
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-{
-<<<<<<< HEAD
-  #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON)
-	if (hasSSE2) {
-		eval_update_sse(x, f, eval_out, eval_in);
-		return;
-	}
-  #endif
-	eval_out->feature = eval_in->feature;
-	if (eval_in->n_empties & 1)
-		eval_update_1(x, f, eval_out);
-	else
-		eval_update_0(x, f, eval_out);
-}
-
-#endif // !defined(hasSSE2) && !defined(__ARM_NEON)
-=======
-	const CoordinateToFeature *s = EVAL_X2F + move->x;
-	int x;
-	unsigned long long f = move->flipped;
-#ifdef DEBUG
-	int i, j;
-
-	for (i = 0; i < s->n_feature; ++i) {
-		j = s->feature[i].i;
-		assert(0 <= j && j < EVAL_N_FEATURE);
-		eval->feature.us[j] += 2 * s->feature[i].x;
-		assert(eval->feature.us[j] <= EVAL_MAX_VALUE[j]);
-	}
-
-	foreach_bit (x, f) {
-		s = EVAL_X2F + x;
-		for (i = 0; i < s->n_feature; ++i) {
-			j = s->feature[i].i;
-			assert(0 <= j && j < EVAL_N_FEATURE);
-			eval->feature.us[j] += s->feature[i].x;
-			assert(eval->feature.us[j] <= EVAL_MAX_VALUE[j]);
-		}
-	}
-
-#else
-	switch (s->n_feature) {
-	default:
-	       	eval->feature.us[s->feature[6].i] += 2 * s->feature[6].x;
-	case 6:	eval->feature.us[s->feature[5].i] += 2 * s->feature[5].x;
-	case 5:	eval->feature.us[s->feature[4].i] += 2 * s->feature[4].x;
-	case 4:	eval->feature.us[s->feature[3].i] += 2 * s->feature[3].x;
-	       	eval->feature.us[s->feature[2].i] += 2 * s->feature[2].x;
-	       	eval->feature.us[s->feature[1].i] += 2 * s->feature[1].x;
-	       	eval->feature.us[s->feature[0].i] += 2 * s->feature[0].x;
-	       	break;
-	}
-
-	foreach_bit (x, f) {
-		s = EVAL_X2F + x;
-		switch (s->n_feature) {
-		default:
-		       	eval->feature.us[s->feature[6].i] += s->feature[6].x;
-		case 6:	eval->feature.us[s->feature[5].i] += s->feature[5].x;
-		case 5:	eval->feature.us[s->feature[4].i] += s->feature[4].x;
-		case 4:	eval->feature.us[s->feature[3].i] += s->feature[3].x;
-		       	eval->feature.us[s->feature[2].i] += s->feature[2].x;
-		       	eval->feature.us[s->feature[1].i] += s->feature[1].x;
-		       	eval->feature.us[s->feature[0].i] += s->feature[0].x;
-		       	break;
-		}
-	}
-#endif
-}
-
-static void eval_restore_1(Eval *eval, const Move *move)
-{
-	const CoordinateToFeature *s = EVAL_X2F + move->x;
-	int x;
-	unsigned long long f = move->flipped;
-#ifdef DEBUG
-	int i, j;
-
-	for (i = 0; i < s->n_feature; ++i) {
-		j = s->feature[i].i;
-		assert(0 <= j && j < EVAL_N_FEATURE);
-		eval->feature.us[j] += s->feature[i].x;
-		assert(eval->feature.us[j] <= EVAL_MAX_VALUE[j]);
-	}
-
-	foreach_bit (x, f) {
-		s = EVAL_X2F + x;
-		for (i = 0; i < s->n_feature; ++i) {
-			j = s->feature[i].i;
-			assert(0 <= j && j < EVAL_N_FEATURE);
-			eval->feature.us[j] -= s->feature[i].x;
-			assert(eval->feature.us[j] <= EVAL_MAX_VALUE[j]);
-		}
-	}
-
-#else
-	switch (s->n_feature) {
-	default:
-	       	eval->feature.us[s->feature[6].i] += s->feature[6].x;
-	case 6:	eval->feature.us[s->feature[5].i] += s->feature[5].x;
-	case 5:	eval->feature.us[s->feature[4].i] += s->feature[4].x;
-	case 4:	eval->feature.us[s->feature[3].i] += s->feature[3].x;
-	       	eval->feature.us[s->feature[2].i] += s->feature[2].x;
-	       	eval->feature.us[s->feature[1].i] += s->feature[1].x;
-	       	eval->feature.us[s->feature[0].i] += s->feature[0].x;
-	       	break;
-	}
-
-	foreach_bit (x, f) {
-		s = EVAL_X2F + x;
-		switch (s->n_feature) {
-		default:
-		       	eval->feature.us[s->feature[6].i] -= s->feature[6].x;
-		case 6:	eval->feature.us[s->feature[5].i] -= s->feature[5].x;
-		case 5:	eval->feature.us[s->feature[4].i] -= s->feature[4].x;
-		case 4:	eval->feature.us[s->feature[3].i] -= s->feature[3].x;
-		       	eval->feature.us[s->feature[2].i] -= s->feature[2].x;
-		       	eval->feature.us[s->feature[1].i] -= s->feature[1].x;
-		       	eval->feature.us[s->feature[0].i] -= s->feature[0].x;
-		       	break;
-		}
-	}
-#endif
-}
-
-void eval_restore(Eval *eval, const Move *move)
-{
-	assert(move->flipped);
-	eval_swap(eval);
-	assert(WHITE == eval->player || BLACK == eval->player);
-
-	if (eval->player)
-		eval_restore_1(eval, move);
-	else
-		eval_restore_0(eval, move);
-}
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-
-#endif // if 0
-=======
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-#endif // hasSSE2
-=======
-#endif // !defined(hasSSE2) && !defined(hasNeon)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
 #endif // !defined(hasSSE2) && !defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 
 /**
  * @brief Update/Restore the features after a passing move.
@@ -2260,19 +915,13 @@ void eval_restore(Eval *eval, const Move *move)
 void eval_pass(Eval *eval)
 {
 	int i;
-<<<<<<< HEAD
-=======
 
 	for (i =  0; i <  4; ++i)	// 9
 		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] + 19683];
 	for (i =  4; i < 16; ++i)	// 10
 		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i]];
 	for (i = 16; i < 30; ++i)	// 8
-<<<<<<< HEAD
 		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - EVAL_OFFSET[i] + 26244] + EVAL_OFFSET[i];
-=======
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - EVAL_OFFSET[i] + 26244]+ EVAL_OFFSET[i];
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 	for (i = 30; i < 34; ++i)	// 7
 		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] + 28431];
 	for (i = 34; i < 38; ++i)	// 6
@@ -2282,35 +931,7 @@ void eval_pass(Eval *eval)
 	for (i = 42; i < 46; ++i)	// 4
 		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - 3159 + 29484] + 3159;
 }
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	for (i =  0; i <  4; ++i)	// 9
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] + 19683];
-	for (i =  4; i < 16; ++i)	// 10
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i]];
-	for (i = 16; i < 30; ++i)	// 8
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - EVAL_OFFSET[i] + 26244] + EVAL_OFFSET[i];
-	for (i = 30; i < 34; ++i)	// 7
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] + 28431];
-	for (i = 34; i < 38; ++i)	// 6
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - 2187 + 29160] + 2187;
-	for (i = 38; i < 42; ++i)	// 5
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - 2916 + 29403] + 2916;
-	for (i = 42; i < 46; ++i)	// 4
-		eval->feature.us[i] = OPPONENT_FEATURE[eval->feature.us[i] - 3159 + 29484] + 3159;
-}
-=======
-#endif // __SSE2__
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-#endif // hasSSE2
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-=======
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
 /**
  * @brief Compute the error-type of the evaluation function according to the
  * depths.
diff --git a/src/eval.h b/src/eval.h
index c068b8b..babee2a 100644
--- a/src/eval.h
+++ b/src/eval.h
@@ -1,51 +1,17 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file eval.h
  *
  * Evaluation function's header.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2018
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
  * @author Richard Delorme
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Richard Delorme
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 1998 - 2022
-=======
- * @date 1998 - 2023
->>>>>>> 6de3ab1 (Omit eval_weight table for ply > 53)
- * @author Richard Delorme
- * @version 4.5
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
  */
 
 #ifndef EDAX_EVAL_H
 #define EDAX_EVAL_H
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #include "bit.h"
-=======
-#ifdef __SSE2__
-	#include <x86intrin.h>
-#endif
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-#include "bit.h"
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 
 /** number of features */
 enum { EVAL_N_FEATURE = 47 };
@@ -54,8 +20,6 @@ enum { EVAL_N_FEATURE = 47 };
  * struct Eval
  * @brief evaluation function
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 typedef union {
 	unsigned short us[48];
 	unsigned long long ull[12];	// SWAR
@@ -70,79 +34,14 @@ typedef union {
 } EVAL_FEATURE_V;
 
 typedef struct Eval {
-<<<<<<< HEAD
 	EVAL_FEATURE_V feature;                       /**!< discs' features (96) */
 	int n_empties;                                /**< number of empty squares (4) */
 	unsigned int parity;                          /**< parity (4) */
-=======
-	union {
-		unsigned short us[EVAL_N_FEATURE];         /**!< discs' features */
-#ifdef __SSE2__
-		__v8hi	v8[6];
-#endif
-#ifdef __AVX2__
-		__v16hi	v16[3];
-#endif
-	} feature;
-	int player;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-typedef struct Eval {
-	union {
-		unsigned short us[EVAL_N_FEATURE];         /**!< discs' features */
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-		__m128i	v8[6];
-=======
-typedef union {
-	unsigned short us[48];
-	unsigned long long ull[12];	// SWAR
-#ifdef __ARM_NEON__
-	int16x8_t v8[6];
-#elif defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128i	v8[6];
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-#endif
-#ifdef __AVX2__
-	__m256i	v16[3];
-#endif
-} EVAL_FEATURE_V;
-
-typedef struct Eval {
-<<<<<<< HEAD
-	EVAL_FEATURE_V feature;                       /**!< discs' features */
-	int n_empties;                                /**< number of empty squares */
-	unsigned int parity;                          /**< parity */
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	EVAL_FEATURE_V feature;                       /**!< discs' features (96) */
-	int n_empties;                                /**< number of empty squares (4) */
-	unsigned int parity;                          /**< parity (4) */
->>>>>>> 7167fe4 (Fill struct Search AVX alignment hole)
 } Eval;
 
 struct Board;
 struct Move;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-/** unpacked weights */
-// enum { EVAL_N_WEIGHT = 226315 };
-typedef struct Eval_weight {
-	short	S0;		// also acts as guard for VGATHERDD access
-	short	C9[19683];
-	short	C10[59049];
-	short	S100[59049];
-	short	S101[59049];
-	short	S8x4[6561*4];
-	short	S7654[2187+729+243+81];
-} Eval_weight;
-=======
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-/** number of (unpacked) weights */
-enum { EVAL_N_WEIGHT = 226315 };
-=======
 /** unpacked weights */
 // enum { EVAL_N_WEIGHT = 226315 };
 typedef struct Eval_weight {
@@ -154,39 +53,11 @@ typedef struct Eval_weight {
 	short	S8x4[6561*4];
 	short	S7654[2187+729+243+81];
 } Eval_weight;
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
 
 /** number of plies */
 enum { EVAL_N_PLY = 54 };	// decreased from 60 in 4.5.1
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-extern short (*EVAL_WEIGHT)[EVAL_N_PLY][EVAL_N_WEIGHT];
-<<<<<<< HEAD
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-
-<<<<<<< HEAD
-/** number of plies */
-enum { EVAL_N_PLY = 54 };	// decreased from 60 in 4.5.1
-
 extern Eval_weight (*EVAL_WEIGHT)[EVAL_N_PLY - 2];	// for 2..53
-=======
-
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#ifndef SELECTIVE_EVAL_UPDATE
-
-extern const EVAL_FEATURE_V EVAL_FEATURE[65];
-extern const EVAL_FEATURE_V EVAL_FEATURE_all_opponent;
-
-#endif
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-extern Eval_weight (*EVAL_WEIGHT)[EVAL_N_PLY];
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
-extern Eval_weight (*EVAL_WEIGHT)[EVAL_N_PLY - 2];	// for 2..53
->>>>>>> 6de3ab1 (Omit eval_weight table for ply > 53)
 
 /* function declaration */
 void eval_open(const char*);
@@ -194,121 +65,20 @@ void eval_close(void);
 // void eval_init(Eval*);
 // void eval_free(Eval*);
 void eval_set(Eval*, const struct Board*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-void eval_update(Eval*, const struct Move*);
-void eval_update_leaf(Eval*, const Eval*, const struct Move*);
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-void eval_update(int, unsigned long long, Eval*);
-void eval_update_leaf(int, unsigned long long, Eval*, const Eval*);
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-=======
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 void eval_restore(Eval*, const struct Move*);
 void eval_pass(Eval*);
 double eval_sigma(const int, const int, const int);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_MSVC_X86) || defined(ANDROID)
-void eval_update_sse(int, unsigned long long, Eval *, const Eval *);
-#endif
-#if defined(hasSSE2) || defined(__ARM_NEON)
-=======
-#if defined(hasSSE2) || defined(__ARM_NEON__) || defined(USE_MSVC_X86)
-void eval_update_sse(int, unsigned long long, Eval *, const Eval *);
-#endif
-#if defined(hasSSE2) || defined(hasNeon)
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
-=======
 #if defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_MSVC_X86) || defined(ANDROID)
 void eval_update_sse(int, unsigned long long, Eval *, const Eval *);
 #endif
 #if defined(hasSSE2) || defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #define	eval_update(x, f, eval)	eval_update_sse(x, f, eval, eval)
 #define	eval_update_leaf(x, f, eval_out, eval_in)	eval_update_sse(x, f, eval_out, eval_in)
 #else
 void eval_update(int, unsigned long long, Eval*);
 void eval_update_leaf(int, unsigned long long, Eval*, const Eval*);
-<<<<<<< HEAD
-=======
-#endif
-
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
-#endif
-
 #endif
 
-=======
-/**
- * @file eval.h
- *
- * Evaluation function's header.
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @version 4.4
- */
-
-#ifndef EDAX_EVAL_H
-#define EDAX_EVAL_H
-
-#include "bit.h"
-
-/** number of features */
-enum { EVAL_N_FEATURE = 47 };
-
-/**
- * struct Eval
- * @brief evaluation function
- */
-typedef struct Eval {
-	union {
-		unsigned short us[EVAL_N_FEATURE];         /**!< discs' features */
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-		__m128i	v8[6];
-#endif
-#ifdef __AVX2__
-		__m256i	v16[3];
-#endif
-	} feature;
-	int n_empties;                                /**< number of empty squares */
-	unsigned int parity;                          /**< parity */
-} Eval;
-
-struct Board;
-struct Move;
-
-/** number of (unpacked) weights */
-enum { EVAL_N_WEIGHT = 226315 };
-
-/** number of plies */
-enum { EVAL_N_PLY = 61 };
-
-extern short (*EVAL_WEIGHT)[EVAL_N_PLY][EVAL_N_WEIGHT];
-
-
-/* function declaration */
-void eval_open(const char*);
-void eval_close(void);
-// void eval_init(Eval*);
-// void eval_free(Eval*);
-void eval_set(Eval*, const struct Board*);
-void eval_update(Eval*, const struct Move*);
-void eval_update_leaf(Eval*, const Eval*, const struct Move*);
-void eval_restore(Eval*, const struct Move*);
-void eval_pass(Eval*);
-double eval_sigma(const int, const int, const int);
-
-#endif
-
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 #endif
 
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/eval_builder.c b/src/eval_builder.c
index cb40df7..ee75925 100644
--- a/src/eval_builder.c
+++ b/src/eval_builder.c
@@ -12,20 +12,8 @@
  * @version 5.0
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 enum { MAX_N_GAMES = 3200000 };
 
-=======
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-enum { MAX_N_GAMES = 2500000 };
-=======
-enum { MAX_N_GAMES = 3200000 };
->>>>>>> a9633d5 (Initial 4.5.2; some reformats)
-
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 #define _CRT_SECURE_NO_WARNINGS
 
  // #include "const.h"
@@ -324,18 +312,8 @@ void InitBoard(Board *b)
 
 	for (i = 0; i < 64; ++i)
 		b->square[i] = PEMPTY;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	b->square[E4] = b->square[D5] = PBLACK;
 	b->square[D4] = b->square[E5] = PWHITE;
-=======
-	b->square[28] = b->square[35] = PBLACK;
-	b->square[27] = b->square[36] = PWHITE;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	b->square[E4] = b->square[D5] = PBLACK;
-	b->square[D4] = b->square[E5] = PWHITE;
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	b->player = PBLACK;
 	b->BWTotal = 4;
 	b->ScoreDiff = 0;
@@ -494,23 +472,10 @@ void eval_edax_v3_get_packed_features(const Board* b, int* X)
 void eval_builder_get_angle_X_features(const Board* b, int* X) {
 	int p = b->player;
 	const char* x = b->square;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	X[0] = c10[p][x[A5] * 19683 + x[A4] * 6561 + x[A3] * 2187 + x[A2] * 729 + x[A1] * 243 + x[B2] * 81 + x[B1] * 27 + x[C1] * 9 + x[D1] * 3 + x[E1]];
 	X[1] = c10[p][x[H5] * 19683 + x[H4] * 6561 + x[H3] * 2187 + x[H2] * 729 + x[H1] * 243 + x[G2] * 81 + x[G1] * 27 + x[F1] * 9 + x[E1] * 3 + x[D1]];
 	X[2] = c10[p][x[A4] * 19683 + x[A5] * 6561 + x[A6] * 2187 + x[A7] * 729 + x[A8] * 243 + x[B7] * 81 + x[B8] * 27 + x[C8] * 9 + x[D8] * 3 + x[E8]];
 	X[3] = c10[p][x[H4] * 19683 + x[H5] * 6561 + x[H6] * 2187 + x[H7] * 729 + x[H8] * 243 + x[G7] * 81 + x[G8] * 27 + x[F8] * 9 + x[E8] * 3 + x[D8]];
-<<<<<<< HEAD
-=======
-	X[0] = c10[p][x[A5] * 19683 + x[A4] * 6561 + x[A3] * 2187 + x[A2] * 729 + x[A1] * 243 + x[B2] * 81 + x[B1] * 27 + x[C1] * 9 + x[D1] * 3 + x[E1]] + 10206;
-	X[1] = c10[p][x[H5] * 19683 + x[H4] * 6561 + x[H3] * 2187 + x[H2] * 729 + x[H1] * 243 + x[G2] * 81 + x[G1] * 27 + x[F1] * 9 + x[E1] * 3 + x[D1]] + 10206;
-	X[2] = c10[p][x[A4] * 19683 + x[A5] * 6561 + x[A6] * 2187 + x[A7] * 729 + x[A8] * 243 + x[B7] * 81 + x[B8] * 27 + x[C8] * 9 + x[D8] * 3 + x[E8]] + 10206;
-	X[3] = c10[p][x[H4] * 19683 + x[H5] * 6561 + x[H6] * 2187 + x[H7] * 729 + x[H8] * 243 + x[G7] * 81 + x[G8] * 27 + x[F8] * 9 + x[E8] * 3 + x[D8]] + 10206;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	X[4] = 29889;
 }
 
@@ -519,43 +484,19 @@ void eval_builder_get_angle_X_features(const Board* b, int* X) {
 void eval_builder_get_corner_block_features(const Board* b, int* X) {
 	int p = b->player;
 	const char* x = b->square;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	X[0] = s10[p][x[A1] * 19683 + x[C1] * 6561 + x[D1] * 2187 + x[C2] * 729 + x[D2] * 243 + x[E2] * 81 + x[F2] * 27 + x[E1] * 9 + x[F1] * 3 + x[H1]];
 	X[1] = s10[p][x[A8] * 19683 + x[C8] * 6561 + x[D8] * 2187 + x[C7] * 729 + x[D7] * 243 + x[E7] * 81 + x[F7] * 27 + x[E8] * 9 + x[F8] * 3 + x[H8]];
 	X[2] = s10[p][x[A1] * 19683 + x[A3] * 6561 + x[A4] * 2187 + x[B3] * 729 + x[B4] * 243 + x[B5] * 81 + x[B6] * 27 + x[A5] * 9 + x[A6] * 3 + x[A8]];
 	X[3] = s10[p][x[H1] * 19683 + x[H3] * 6561 + x[H4] * 2187 + x[G3] * 729 + x[G4] * 243 + x[G5] * 81 + x[G6] * 27 + x[H5] * 9 + x[H6] * 3 + x[H8]];
-<<<<<<< HEAD
-=======
-	X[0] = s10[p][x[A1] * 19683 + x[C1] * 6561 + x[D1] * 2187 + x[C2] * 729 + x[D2] * 243 + x[E2] * 81 + x[F2] * 27 + x[E1] * 9 + x[F1] * 3 + x[H1]] + 69741;
-	X[1] = s10[p][x[A8] * 19683 + x[C8] * 6561 + x[D8] * 2187 + x[C7] * 729 + x[D7] * 243 + x[E7] * 81 + x[F7] * 27 + x[E8] * 9 + x[F8] * 3 + x[H8]] + 69741;
-	X[2] = s10[p][x[A1] * 19683 + x[A3] * 6561 + x[A4] * 2187 + x[B3] * 729 + x[B4] * 243 + x[B5] * 81 + x[B6] * 27 + x[A5] * 9 + x[A6] * 3 + x[A8]] + 69741;
-	X[3] = s10[p][x[H1] * 19683 + x[H3] * 6561 + x[H4] * 2187 + x[G3] * 729 + x[G4] * 243 + x[G5] * 81 + x[G6] * 27 + x[H5] * 9 + x[H6] * 3 + x[H8]] + 69741;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	X[4] = 29646;
 }
 
 /* gamebase */
 
 typedef struct Game {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	char	move[60];	// MSB = 1: same player's move after opponent's pass
-	int	score;		// black - white
-	int	suboptimal_ply;
-=======
-	char	move[60];
-	int	score;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 	char	move[60];	// MSB = 1: same player's move after opponent's pass
 	int	score;		// black - white
 	int	suboptimal_ply;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 } Game;
 
 
@@ -564,18 +505,6 @@ typedef struct Gamebase {
 	Game	games[];
 } Gamebase;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-enum { MAX_N_GAMES = 22000 };
-=======
-enum { MAX_N_GAMES = 1000000 };
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 Gamebase* gamebase_create(int i)
 {
 	Gamebase* base = (Gamebase*) malloc(sizeof(int) + sizeof(Game) * MAX_N_GAMES);
@@ -584,10 +513,6 @@ Gamebase* gamebase_create(int i)
 	return base;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 static int compare_moves(const void* a, const void* b)
 {
 	return memcmp((*(Game **)a)->move, (*(Game **)b)->move, 60);
@@ -640,7 +565,6 @@ void gamebase_minimax(Gamebase *base, int ply)
 	free(ga);
 }
 
-<<<<<<< HEAD
 /* f5d6c3d3c4.. */
 void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
 {
@@ -649,22 +573,6 @@ void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
 	Board	b;
 	Game	*g;
 	FILE	*f = fopen(file_1, "r");
-=======
-=======
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-/* f5d6c3d3c4.. */
-void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
-{
-	int	i, j, m;
-	char	s[130], *p;
-	Board	b;
-<<<<<<< HEAD
-	FILE* f = fopen(file_1, "r");
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	Game	*g;
-	FILE	*f = fopen(file_1, "r");
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 
 	if (f == NULL) {
 		fprintf(stderr, "gamebase_import : can't open %s\n", file_1);
@@ -672,20 +580,11 @@ void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
 	}
 
 	i = 0;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	g = base->games;
-=======
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	g = base->games;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 	while (i < MAX_N_GAMES) {
 		if (fgets(s, sizeof(s), f) == NULL)
 			break;	// EOF or error
 		InitBoard(&b);
-<<<<<<< HEAD
-<<<<<<< HEAD
 		j = 0; p = s;
 		while ((((*p >= 'A') && (*p <= 'H')) || ((*p >= 'a') && (*p <= 'h'))) && *(p + 1)) {
 			m = ((*p - 'A') & 7) + ((*(p + 1) - '1') & 7) * 8;
@@ -697,51 +596,13 @@ void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
 					fprintf(stderr, "gamebase_import : illegal move in line %d\n", i);
 					exit(EXIT_FAILURE);
 				}
-<<<<<<< HEAD
-				m |= 0x80;	// opponent pass
-			}
-			g->move[j++] = m;
-			p += 2;
-		}
-		while (j < 60)
-			g->move[j++] = NOMOVE;
-=======
-		j = 0;
-		while (s[j * 2] && s[j * 2 + 1]) {
-			m = ((s[j * 2] - 'A') & 7) + (s[j * 2 + 1] - '1') * 8;
-			base->games[i].move[j++] = m;
-=======
-		j = 0; p = s;
-		while ((((*p >= 'A') && (*p <= 'H')) || ((*p >= 'a') && (*p <= 'h'))) && *(p + 1)) {
-			m = ((*p - 'A') & 7) + ((*(p + 1) - '1') & 7) * 8;
-<<<<<<< HEAD
-			g->move[j++] = m;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
-			assert(b.square[m] == PEMPTY);
-			if (!MPerform(&b, m)) {
-				b.player ^= (PBLACK ^ PWHITE);
-				if (!MPerform(&b, m))
-					break;
-=======
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 				m |= 0x80;	// opponent pass
 			}
 			g->move[j++] = m;
 			p += 2;
 		}
 		while (j < 60)
-<<<<<<< HEAD
-<<<<<<< HEAD
-			base->games[i].move[j++] = PASS;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-			g->move[j++] = PASS;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
 			g->move[j++] = NOMOVE;
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 		m = b.ScoreDiff;
 		if (b.player != PBLACK)
 			m = -m;
@@ -749,8 +610,6 @@ void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
 			m += 64 - b.BWTotal;
 		else if (m < 0)
 			m -= 64 - b.BWTotal;
-<<<<<<< HEAD
-<<<<<<< HEAD
 		g->score = m;
 		g->suboptimal_ply = -1;
 		++i;
@@ -762,32 +621,10 @@ void gamebase_import(Gamebase* base, const char* file_1, int minimax_ply)
 
 	if (minimax_ply)
 		gamebase_minimax(base, minimax_ply);
-=======
-		base->games[i].score = m;
-=======
-		g->score = m;
-		g->suboptimal_ply = -1;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-		++i;
-		++g;
-	}
-	fclose(f);
-	base->n_games = i;
-<<<<<<< HEAD
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	printf("eval_builder : read %d games\n", i);
-
-	if (minimax_ply)
-		gamebase_minimax(base, minimax_ply);
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 }
 
 bool game_get_board(Game* g, int ply, Board* b)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int	i, m, t;
 
 	InitBoard(b);
@@ -801,38 +638,6 @@ bool game_get_board(Game* g, int ply, Board* b)
 		}
 		t = MPerform(b, m & 0x7f);
 		assert(t);
-=======
-	int	i;
-=======
-	int	i, m;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
-	int	i, m, t;
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
-
-	InitBoard(b);
-	for (i = 0; i < ply; ++i) {
-		m = g->move[i];
-		if (m == NOMOVE)
-			return false;
-		if (m & 0x80) {	// same player
-			b->player ^= (PBLACK ^ PWHITE);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (!MPerform(b, g->move[i]))
-				return false;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		if (!MPerform(b, m & 0x7f))
-			return false;
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
-			b->ScoreDiff = -b->ScoreDiff;
-		}
-		t = MPerform(b, m & 0x7f);
-		assert(t);
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 	}
 	return true;
 }
@@ -850,18 +655,8 @@ typedef struct sl_Plot {
 double sl_min(double* x, int n)
 {
 	int	i;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	double	y = x[0];
-	for (i = 1; i < n; ++i)
-=======
-	double	y = INFINITY;
-	for (i = 0; i < n; ++i)
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 	double	y = x[0];
 	for (i = 1; i < n; ++i)
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 		if (x[i] < y)
 			y = x[i];
 	return y;
@@ -870,18 +665,8 @@ double sl_min(double* x, int n)
 double sl_max(double* x, int n)
 {
 	int	i;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	double	y = x[0];
 	for (i = 1; i < n; ++i)
-=======
-	double	y = -INFINITY;
-	for (i = 0; i < n; ++i)
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	double	y = x[0];
-	for (i = 1; i < n; ++i)
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 		if (x[i] > y)
 			y = x[i];
 	return y;
@@ -890,18 +675,8 @@ double sl_max(double* x, int n)
 double sl_mean(double* x, int n)
 {
 	int	i;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	double	s = x[0];
-	for (i = 1; i < n; ++i)
-=======
-	double	s = 0.0;
-	for (i = 0; i < n; ++i)
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 	double	s = x[0];
 	for (i = 1; i < n; ++i)
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 		s += x[i];
 	return s / n;
 }
@@ -1044,32 +819,18 @@ void sl_plot_axis(sl_Plot* plot, sl_Point* A, sl_Point* B, sl_Point* O)
 		"\t76 128 moveto\n"
 		"\t(%d) show\n"
 		"\t76 70 moveto\n"
-<<<<<<< HEAD
-<<<<<<< HEAD
 		"\t(\\(%d, %d\\)) show\n"
 		"\tnewpath\n\n", A->x, A->y, B->x, B->y, O->x, O->y);
-=======
-		"\t(\\(%d, %d\\)) show\n\n", A->x, A->y, B->x, B->y, O->x, O->y);
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		"\t(\\(%d, %d\\)) show\n"
-		"\tnewpath\n\n", A->x, A->y, B->x, B->y, O->x, O->y);
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 }
 
 void sl_plot_scatter(sl_Plot* plot, sl_Point* X, int I)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 	int	(*sc)[129][129] = (int(*)[129][129]) calloc(129 * 129, sizeof(int));
 	int	i, x, y, t, mx;
 	double	gray;
 
 	assert(sc);
 	mx = 1;
-<<<<<<< HEAD
 	for (i = 0; i < I; ++i) {
 		x = X[i].x;
 		y = X[i].y;
@@ -1091,35 +852,6 @@ void sl_plot_scatter(sl_Plot* plot, sl_Point* X, int I)
 			}
 		}
 	free(sc);
-=======
-	int	i;
-=======
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-	for (i = 0; i < I; ++i) {
-		x = X[i].x;
-		y = X[i].y;
-		if ((x >= -64) && (x <= 64) && (y >= -64) && (y <= 64)) {
-			t = ++(*sc)[x + 64][y + 64];
-			if ((x | y) && (t > mx))
-				mx = t;
-		}
-	}
-<<<<<<< HEAD
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	for (y = -64; y <= 64; ++y)
-		for (x = -64; x <= 64; ++x) {
-			t = (*sc)[x + 64][y + 64];
-			if (t) {
-				gray = 0.8 - (double) t / mx;
-				if (gray < 0.0)
-					gray = 0.0;
-				fprintf(plot->f, "\t%f setgray\n", gray);
-				fprintf(plot->f, "\t%d %d 0.75 0 360 arc fill\n", x + 75, y + 68);
-			}
-		}
-	free(sc);
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 }
 
 void sl_plot_close(sl_Plot* plot)
@@ -1626,15 +1358,7 @@ EvalBuilder* eval_builder_create_edax3c(int n_games) {
 	int vector_times[] = { 4,4,4,4,4,4,4,2,4,4,4,4,1 };
 
 	// eval_init();
-<<<<<<< HEAD
-<<<<<<< HEAD
 	eval_builder_set_features = eval_edax_v3_get_packed_features;
-=======
-	eval_builder_set_features = eval_edax_v3r1_get_packed_features;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-	eval_builder_set_features = eval_edax_v3_get_packed_features;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
 	return eval_builder_create(13, vector_size, vector_times, 47, n_games);
 }
 
@@ -1669,18 +1393,8 @@ EvalBuilder* eval_builder_create_logistello(int n_games) {
 	int vector_times[] = { 4,4,8,4,4,4,2,4,4,4,4,1 };
 
 	// eval_init();
-<<<<<<< HEAD
-<<<<<<< HEAD
 	eval_builder_set_features = eval_builder_logistello_get_features;
 	return eval_builder_create(12, vector_size, vector_times, 47, n_games);
-=======
-	eval_builder_set_features=eval_builder_logistello_get_features;
-	return eval_builder_create(12,vector_size,vector_times,47,n_games);
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-	eval_builder_set_features = eval_builder_logistello_get_features;
-	return eval_builder_create(12, vector_size, vector_times, 47, n_games);
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
 }
 
 /* create a new EvalBuilder structure for a single feature */
@@ -1689,15 +1403,7 @@ EvalBuilder* eval_builder_create_feature(int n_games, int feature) {
 	int vector_times[] = { 4,1 };
 
 	// eval_init();
-<<<<<<< HEAD
-<<<<<<< HEAD
-	switch (feature) {
-=======
-	switch(feature){
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
 	switch (feature) {
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
 	case EVAL_CORNER3x3:
 		eval_builder_set_features = eval_builder_get_corner3x3_features;
 		vector_size[0] = 10206;
@@ -1777,10 +1483,6 @@ EvalBuilder* eval_builder_create_feature(int n_games, int feature) {
 	}
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 /* select feature type and create */
 EvalBuilder* eval_builder_select_feature(int n_games, int eval) {
 	switch (eval) {
@@ -1820,26 +1522,6 @@ void eval_builder_build_features(EvalBuilder* eval, Gamebase* base, int ply) {
 			}
 		++g;
 	}
-=======
-/* build the features */
-void eval_builder_build_features(EvalBuilder* eval, Gamebase* base, int ply) {
-	int n = base->n_games, i, I;
-	Board b;
-	Game* g;
-
-	eval_builder_set_ply(eval, ply);
-	g = base->games;
-	for (i = I = 0; i < n; i++) {
-		if (ply > g->suboptimal_ply)
-			if (game_get_board(g, ply, &b) && (!board_is_game_over(&b) || ply == 60)) {
-				if (b.player == PBLACK) eval->score[I] = g->score;	// b - w
-				else eval->score[I] = -(g->score);	// w - b
-				eval_builder_set_features(&b, eval->feature[I]);
-				I++;
-			}
-		++g;
-	}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
 	eval->n_games = I;
 }
 
@@ -1888,8 +1570,6 @@ void eval_builder_eval(EvalBuilder* eval, int ply, double* x, double* y) {
 /* count non zero coefficients */
 int eval_builder_count_features(EvalBuilder* eval, int ply) {
 	int i, j, k, n;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int* h = (int*)calloc(eval->n_data, sizeof(int));
 
 	for (i = 0; i < eval->n_games; i++)
@@ -1897,24 +1577,6 @@ int eval_builder_count_features(EvalBuilder* eval, int ply) {
 			h[eval->feature[i][j]]++;
 	for (k = n = 0; k < eval->n_data; k++)
 		if (h[k] > 0) n++;
-=======
-	int I = eval->n_games, J = eval->n_features, K = eval->n_data;
-	int** x = eval->feature;
-	int* h = (int*)calloc(K, sizeof(int));
-
-	for (i = 0; i < I; i++)
-		for (j = 0; j < J; j++) h[x[i][j]]++;
-	for (k = n = 0; k < K; k++) if (h[k] > 0) n++;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	int* h = (int*)calloc(eval->n_data, sizeof(int));
-
-	for (i = 0; i < eval->n_games; i++)
-		for (j = 0; j < eval->n_features; j++)
-			h[eval->feature[i][j]]++;
-	for (k = n = 0; k < eval->n_data; k++)
-		if (h[k] > 0) n++;
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 
 	free(h);
 
@@ -1970,15 +1632,7 @@ void eval_builder_set_coefficient(EvalBuilder* eval, double* w) {
 	for (k = 0; k < K; k++) a[k] = (short)(128.0 * w[k] + 0.5);
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/* compute abs error */
-=======
-/* compute error */
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 /* compute abs error */
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 double eval_builder_get_abs_error(EvalBuilder* eval, double* w, double* e) {
 	int i, j, I = eval->n_games, J = eval->n_features;
 	double E = 0.0, score;
@@ -1995,15 +1649,7 @@ double eval_builder_get_abs_error(EvalBuilder* eval, double* w, double* e) {
 	return E;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 /* compute abs error gradient */
-=======
-/* compute error gradient */
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-/* compute abs error gradient */
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 void eval_builder_get_abs_error_gradient(EvalBuilder* eval, double* e, double* g, int* N, int N_min) {
 	int i, j, k;
 	const int I = eval->n_games, J = eval->n_features, K = eval->n_data;
@@ -2011,40 +1657,17 @@ void eval_builder_get_abs_error_gradient(EvalBuilder* eval, double* e, double* g
 
 	for (k = 0; k < K; k++) g[k] = 0.0;
 	for (i = 0; i < I; i++) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		if (e[i] < 0.0) for (j = 0; j < J; j++) g[x[i][j]]++;
 		else if (e[i] > 0.0) for (j = 0; j < J; j++) g[x[i][j]]--;
 	}
 	if (N == NULL)
 		for (k = 0; k < K; k++) g[k] *= 1.0 / I;
-=======
-		if (e[i] < 0.0) for (j = 0; j < J; j++) g[x[i][j]] ++;
-		else if (e[i] > 0.0) for (j = 0; j < J; j++) g[x[i][j]] --;
-	}
-	if (N == NULL) for (k = 0; k < K; k++) g[k] *= 1.0 / I;
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		if (e[i] < 0.0) for (j = 0; j < J; j++) g[x[i][j]]++;
-		else if (e[i] > 0.0) for (j = 0; j < J; j++) g[x[i][j]]--;
-	}
-	if (N == NULL)
-		for (k = 0; k < K; k++) g[k] *= 1.0 / I;
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	else
 		for (k = 0; k < K; k++)
 			g[k] *= (N[k] < N_min ? 0.0 : (N[k] < 20 ? 0.05 : 1.0 / N[k])) / J;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/* compute squared error */
-=======
-/* compute error */
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 /* compute squared error */
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 double eval_builder_get_squared_error(EvalBuilder* eval, double* w, double* e) {
 	int i, j, I = eval->n_games, J = eval->n_features;
 	double E = 0.0, score;
@@ -2061,15 +1684,7 @@ double eval_builder_get_squared_error(EvalBuilder* eval, double* w, double* e) {
 	return E;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/* compute squared error gradient */
-=======
-/* compute error gradient */
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 /* compute squared error gradient */
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 void eval_builder_get_squared_error_gradient(EvalBuilder* eval, double* e, double* g, int* N, int N_min) {
 	int i, j, k;
 	const int I = eval->n_games, J = eval->n_features, K = eval->n_data;
@@ -2122,15 +1737,7 @@ double eval_builder_minimize_dir_abs_error(EvalBuilder* eval, double* w, double*
 	return l;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/* minimize the squared error along the gradient direction */
-=======
-/* minimize the absolute error along the gradient direction */
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 /* minimize the squared error along the gradient direction */
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 double eval_builder_minimize_dir_squared_error(EvalBuilder* eval, double* w, double* d) {
 	const int I = eval->n_games, J = eval->n_features;
 	int* x;
@@ -2278,15 +1885,7 @@ int eval_builder_conjugate_gradient(EvalBuilder* eval, int ply, EvalOption* opti
 		err1 = sqrt(eval_builder_get_squared_error(eval, w, e));
 	}
 	r1 = 1.0 - (err1 * err1) / (v);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	printf("%2d %4d %6.2f %6.3f %8.4f %12.8f\r", ply, 0, 0.0, 0.0, err1, r1);
-=======
-	printf("%2d %4d %6.2f %6.3f %8.4f %12.8f %9.7f %9.7f\r", ply, 0, 0.0, 0.0, err1, r1, 0.0, 0.0);
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	printf("%2d %4d %6.2f %6.3f %8.4f %12.8f\r", ply, 0, 0.0, 0.0, err1, r1);
->>>>>>> f2ed22c (Fix equalize, unbias squared in eval_builder)
 	fflush(stdout);
 
 	for (iter = 1; iter <= option->max_iter; iter++) {
@@ -2340,16 +1939,7 @@ int eval_builder_conjugate_gradient(EvalBuilder* eval, int ply, EvalOption* opti
 			}
 			if (option->error_type == EVAL_ABS_ERROR) {
 				lambda = eval_builder_minimize_dir_abs_error(eval, w, d);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			} else {
-=======
-			}
-			else {
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 			} else {
->>>>>>> 265487b (Add evalgame command to eval_builder)
 				lambda = eval_builder_minimize_dir_squared_error(eval, w, d);
 			}
 		}
@@ -2378,25 +1968,10 @@ int eval_builder_conjugate_gradient(EvalBuilder* eval, int ply, EvalOption* opti
 		if (option->unbias_frequency && (iter % option->unbias_frequency == 0)) {
 			if (option->error_type == EVAL_ABS_ERROR) {
 				eval_builder_get_abs_error(eval, w, e);
-<<<<<<< HEAD
-<<<<<<< HEAD
 				w[K - 1] += sl_median(e, I);
 			} else {
 				eval_builder_get_squared_error(eval, w, e);
 				w[K - 1] += sl_mean(e, I);
-=======
-				w[K - 1] += (m = sl_median(e, I));
-			}
-			else {
-				eval_builder_get_abs_error(eval, w, e);
-				w[K - 1] += (m = sl_mean(e, I));
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-				w[K - 1] += sl_median(e, I);
-			} else {
-				eval_builder_get_squared_error(eval, w, e);
-				w[K - 1] += sl_mean(e, I);
->>>>>>> f2ed22c (Fix equalize, unbias squared in eval_builder)
 			}
 		}
 
@@ -2408,28 +1983,11 @@ int eval_builder_conjugate_gradient(EvalBuilder* eval, int ply, EvalOption* opti
 		/* compute and show error */
 		if (option->error_type == EVAL_ABS_ERROR) {
 			err2 = eval_builder_get_abs_error(eval, w, e);
-<<<<<<< HEAD
-<<<<<<< HEAD
 		} else {
 			err2 = sqrt(eval_builder_get_squared_error(eval, w, e));
 		}
 		r2 = 1.0 - err2 * err2 / v;
 		printf("%2d %4d %6.2f %6.3f %8.4f %12.8f %9.7f %9.7f %11.8f\r", ply, iter, lambda, gamma, err2, r2, max_delta, mean_delta, fabs(err2 - err1));
-=======
-		}
-		else {
-=======
-		} else {
->>>>>>> 265487b (Add evalgame command to eval_builder)
-			err2 = sqrt(eval_builder_get_squared_error(eval, w, e));
-		}
-		r2 = 1.0 - err2 * err2 / v;
-<<<<<<< HEAD
-		printf("%2d  %4d %6.2f %6.3f %8.4f %12.8f %9.7f %9.7f  %10.8f \r", ply, iter, lambda, gamma, err2, r2, max_delta, mean_delta, fabs(err2 - err1));
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		printf("%2d %4d %6.2f %6.3f %8.4f %12.8f %9.7f %9.7f %11.8f\r", ply, iter, lambda, gamma, err2, r2, max_delta, mean_delta, fabs(err2 - err1));
->>>>>>> f2ed22c (Fix equalize, unbias squared in eval_builder)
 		fflush(stdout);
 		if ((iter > option->min_iter || ply < 2) && (fabs(err2 - err1) <= option->accuracy && fabs(max_delta) < 1000 * option->accuracy && fabs(mean_delta) <= 10 * option->accuracy))
 			break;
@@ -2449,27 +2007,11 @@ int eval_builder_conjugate_gradient(EvalBuilder* eval, int ply, EvalOption* opti
 	if (option->unbias_frequency) {
 		if (option->error_type == EVAL_ABS_ERROR) {
 			eval_builder_get_abs_error(eval, w, e);
-<<<<<<< HEAD
-<<<<<<< HEAD
 			w[K - 1] += sl_median(e, I);
 		}
 		else {
 			eval_builder_get_squared_error(eval, w, e);
 			w[K - 1] += sl_mean(e, I);
-=======
-			w[K - 1] += (m = sl_median(e, I));
-		}
-		else {
-			eval_builder_get_abs_error(eval, w, e);
-			w[K - 1] += (m = sl_mean(e, I));
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-			w[K - 1] += sl_median(e, I);
-		}
-		else {
-			eval_builder_get_squared_error(eval, w, e);
-			w[K - 1] += sl_mean(e, I);
->>>>>>> f2ed22c (Fix equalize, unbias squared in eval_builder)
 		}
 	}
 
@@ -2505,15 +2047,7 @@ void eval_builder_build(EvalBuilder* eval, Gamebase* base, EvalOption* option) {
 	printf("error    = %d\n", option->error_type);
 	printf("algo     = %d\n", option->minimization_algorithm);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	printf("ply iter lambda gamma  error     r2         max_delta mean_delta err_delta\n");
-=======
-	printf("ply iter  lambda gamma  error     r2         max_delta mean_delta err_delta\n");
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	printf("ply iter lambda gamma  error     r2         max_delta mean_delta err_delta\n");
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 	for (ply = 0; ply <= 60; ply++) {
 		eval_builder_build_features(eval, base, ply);
 		eval_builder_conjugate_gradient(eval, ply, option);
@@ -2621,15 +2155,7 @@ void eval_builder_get_sub_features(int n, int offset, int feature, int** subfeat
 		}
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/*  filter spatially (between patterns) the coefficients */
-=======
-/*  filter temporally (between plies) the coefficients */
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 /*  filter spatially (between patterns) the coefficients */
->>>>>>> 265487b (Add evalgame command to eval_builder)
 void eval_builder_spatial_filter(EvalBuilder* eval, Gamebase* base, int max_iter, double accuracy) {
 	int power_3[] = { 1,3,9,27,81,243,729,2187,6561,19683,59049,177147,531441 };
 	int i, j, k, p, n, l, iter;
@@ -2847,41 +2373,15 @@ void eval_builder_stat(EvalBuilder* eval, Gamebase* base) {
 	y = (double*)malloc(base->n_games * sizeof(double));
 	e = (double*)malloc(base->n_games * sizeof(double));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	printf("  feat coeffs evmean evsdev  evmin  evmax scmean scsdev smin smax    a       b       r    erbias ersdev ermin  ermax\n");
-=======
-	printf("feature\tcoeffs\tev mean\tev sdev\tev min\tev max\tsc mean\tsc sdev\tsc min\tsc max\ta\tb\tr\terrbias\terrsdev\terrmin\terrmax\n");
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-	printf("  feat coeff evmean evsdev  evmin  evmax scmean  scsdev smin smax     a       b       r    erbias ersdev ermin  ermax\n");
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
-	printf("  feat coeffs evmean evsdev  evmin  evmax scmean  scsdev smin smax     a       b       r    erbias ersdev ermin  ermax\n");
->>>>>>> 265487b (Add evalgame command to eval_builder)
-=======
-	printf("  feat coeffs evmean evsdev  evmin  evmax scmean scsdev smin smax    a       b       r    erbias ersdev ermin  ermax\n");
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 
 	for (ply = 0; ply <= 60; ply++) {
 		eval_builder_build_features(eval, base, ply);
 		eval_builder_eval(eval, ply, x, y);
 		n = eval->n_games;
 		for (i = 0; i < n; i++) e[i] = y[i] - x[i];
-<<<<<<< HEAD
-<<<<<<< HEAD
-		printf("%6d", eval_builder_count_features(eval, ply));
-		printf("%7d", eval_builder_count_significant_coefficients(eval, ply));
-=======
 		printf("%6d", eval_builder_count_features(eval, ply));
-<<<<<<< HEAD
-		printf("%6d", eval_builder_count_significant_coefficients(eval, ply));
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
 		printf("%7d", eval_builder_count_significant_coefficients(eval, ply));
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		printf("%7.2f", sl_mean(x, n));
 		printf("%7.2f", sl_standard_deviation(x, n));
 		printf("%7.2f", sl_min(x, n));
@@ -2897,28 +2397,6 @@ void eval_builder_stat(EvalBuilder* eval, Gamebase* base) {
 		printf("%7.2f", sl_standard_deviation(e, n));
 		printf("%7.2f", sl_min(e, n));
 		printf("%7.2f\n", sl_max(e, n));
-<<<<<<< HEAD
-=======
-		printf("%6d\t", eval_builder_count_features(eval, ply));
-		printf("%6d\t", eval_builder_count_significant_coefficients(eval, ply));
-		printf("%5.2f\t", sl_mean(x, n));
-		printf("%5.2f\t", sl_standard_deviation(x, n));
-		printf("%5.2f\t", sl_min(x, n));
-		printf("%5.2f\t", sl_max(x, n));
-		printf("%5.2f\t", sl_mean(y, n));
-		printf("%5.2f\t", sl_standard_deviation(y, n));
-		printf("%3.0f\t", sl_min(y, n));
-		printf("%3.0f\t", sl_max(y, n));
-		printf("%7.4f\t", sl_regression_a(x, y, n));
-		printf("%7.4f\t", sl_regression_b(x, y, n));
-		printf("%7.4f\t", sl_correlation_r(x, y, n));
-		printf("%5.2f\t", sl_mean(e, n));
-		printf("%5.2f\t", sl_standard_deviation(e, n));
-		printf("%5.2f\t", sl_min(e, n));
-		printf("%5.2f\n", sl_max(e, n));
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 		fflush(stdout);
 	}
 	free(e);
@@ -3042,30 +2520,14 @@ void eval_builder_plot(EvalBuilder* eval, Gamebase* base, const char* plot_file)
 	for (ply = 0; ply <= 60; ply++) {
 		eval_builder_build_features(eval, base, ply);
 		eval_builder_eval(eval, ply, x, y);
-<<<<<<< HEAD
-<<<<<<< HEAD
-		for (i = 0; i < eval->n_games; i++) X[i].x = x[i], X[i].y = y[i];
-=======
-		for (i = 0; i < I; i++) X[i].x = x[i], X[i].y = y[i];
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 		for (i = 0; i < eval->n_games; i++) X[i].x = x[i], X[i].y = y[i];
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 		sprintf(file, "%s-%d.eps", plot_file, ply);
 		sprintf(title, "ply %d.eps", ply);
 
 		plot = sl_plot_open(file);
 		sl_plot_titles(plot, "eval", "score", title);
 		sl_plot_axis(plot, &A, &B, &O);
-<<<<<<< HEAD
-<<<<<<< HEAD
 		sl_plot_scatter(plot, X, eval->n_games);
-=======
-		sl_plot_scatter(plot, X, I);
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		sl_plot_scatter(plot, X, eval->n_games);
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 		sl_plot_close(plot);
 	}
 
@@ -3074,15 +2536,7 @@ void eval_builder_plot(EvalBuilder* eval, Gamebase* base, const char* plot_file)
 	free(X);
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/* show weights of a feature */
-=======
-/* show weights of a feature*/
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 /* show weights of a feature */
->>>>>>> 265487b (Add evalgame command to eval_builder)
 void eval_builder_show_feature_weights(EvalBuilder* eval, int type, const char* feature) {
 	int	i, k, n = eval->vector_squares[type], ply;
 	const int K = eval->n_data;
@@ -3135,31 +2589,14 @@ void eval_builder_show_feature_weights(EvalBuilder* eval, int type, const char*
 /* print version */
 void print_version(void) {
 	printf(	"eval_builder %d.%d %s\n"
-<<<<<<< HEAD
-<<<<<<< HEAD
-		"Copyright (c) 1998-2000 Richard A. Delorme, 2023 Toshihiko Okuhara\n"
-=======
-		"Copyright (c) 1998-2000 Richard A. Delorme.\n"
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 		"Copyright (c) 1998-2000 Richard A. Delorme, 2023 Toshihiko Okuhara\n"
->>>>>>> c193ebf (Fix score after pass bug in eval_builder)
 		"All Rights Reserved.\n\n", EDAX_VERSION, EDAX_RELEASE, __DATE__);
 }
 
 /* print usage */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifdef _MSC_VER
 __declspec(noreturn)
 #endif
-=======
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-#ifdef _MSC_VER
-__declspec(noreturn)
-#endif
->>>>>>> 265487b (Add evalgame command to eval_builder)
 void print_usage(void) {
 	fprintf(stderr,
 		"usage : eval_builder <command> <option> <parameters>\n"
@@ -3183,28 +2620,14 @@ void print_usage(void) {
 		"    ABFG CC BB AA D8 D7 D6 D5 D4 D3 angle+X corner+block\n"
 		"  -unbias <int>    unbias the evaluation function\n"
 		"  -equalize <int>  equalize the evaluation function weight\n"
-<<<<<<< HEAD
-<<<<<<< HEAD
-		"  -zero <int>      zero out rare features frequency\n"
-=======
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 		"  -zero <int>      zero out rare features frequency\n"
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		"  -restart <int>   restart frequency\n"
 		"  -round <int>     round frequency\n"
 		"  -filter <string> filter the evaluation function weight first\n"
 		"    spatial        filter from sub-configuration\n"
 		"    temporal       filter through all plies\n"
 		"  -split <int>[,<int>]  ply to split file before merging them\n"
-<<<<<<< HEAD
-<<<<<<< HEAD
 		"  -minimax <int>   minimax game score up to n-th move\n"
-=======
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		"  -minimax <int>   minimax game score up to n-th move\n"
->>>>>>> 8e75d91 (add minimax option to eval_builder)
 		"commands:\n"
 		"build <option> game_file [eval_file_in] eval_file_out\n"
 		"process <option> game_file [eval_file_in] eval_file_out\n"
@@ -3246,10 +2669,6 @@ int main(int argc, char** argv) {
 	Gamebase* base;
 	EvalBuilder* eval_data, * eval_data_1, * eval_data_2;
 	EvalOption option = {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 		0,			// min_iter
 		1000,			// max_iter
 		0.0001,			// tol		accuracy
@@ -3263,28 +2682,6 @@ int main(int argc, char** argv) {
 		1.0,			// alpha
 		0.1,			// beta
 		0			// minimax	minimax_ply
-<<<<<<< HEAD
-=======
-		0,
-		1000,
-		0.0001,
-		0,
-		0,
-		0,
-		0,
-		50,
-		EVAL_STEEPEST_DESCENT,
-		EVAL_SQUARED_ERROR,
-		1.0,
-<<<<<<< HEAD
-		0.1
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		0.1,
-		0
->>>>>>> 8e75d91 (add minimax option to eval_builder)
-=======
->>>>>>> 52d06c1 (pass flag in gamebase; increase MAX_N_GAMES in eval_builder)
 	};
 
 	int filter, eval;
@@ -3351,49 +2748,7 @@ int main(int argc, char** argv) {
 			// else if (strcmp(argv[i],"edax3d")==0) eval = EVAL_EDAX_3d;
 			else if (strcmp(argv[i], "ajax") == 0) eval = EVAL_AJAX;
 			else if (strcmp(argv[i], "logistello") == 0) eval = EVAL_LOGISTELLO;
-<<<<<<< HEAD
-<<<<<<< HEAD
-			else eval = get_eval_feature_option(argv[i]);
-		}
-		else if (strcmp(argv[i], "-feature") == 0) {
-			feature = get_eval_feature_option(argv[++i]);
-		}
-		else if (strcmp(argv[i], "-split") == 0) {
-			split0 = split1 = atoi(argv[++i]);
-			if ((p = strchr(argv[i], ',')))
-				split1 = atoi(p + 1);
-		}
-		else if (strcmp(argv[i], "-filter") == 0) {
-			if (strcmp(argv[++i], "spatial") == 0) filter = FILTER_SPATIAL;
-			else if (strcmp(argv[i], "temporal") == 0) filter = FILTER_TEMPORAL;
-			else print_usage();
-		}
-		else if (strcmp(argv[i], "-minimax") == 0) {
-			option.minimax_ply = atoi(argv[++i]);
-=======
-			else if (strcmp(argv[i], "corner3x3") == 0) eval = EVAL_CORNER3x3;
-			else if (strcmp(argv[i], "corner5x2") == 0) eval = EVAL_CORNER5x2;
-			else if (strcmp(argv[i], "edge") == 0) eval = EVAL_EDGE;
-			else if (strcmp(argv[i], "edgeX") == 0) eval = EVAL_EDGE_X;
-			else if (strcmp(argv[i], "edgeC") == 0) eval = EVAL_EDGE_C;
-			else if (strcmp(argv[i], "edgeCX") == 0) eval = EVAL_EDGE_CX;
-			else if (strcmp(argv[i], "edgeFG") == 0) eval = EVAL_EDGE_FG;
-			else if (strcmp(argv[i], "ABFG") == 0) eval = EVAL_ABFG;
-			else if (strcmp(argv[i], "CC") == 0) eval = EVAL_CC;
-			else if (strcmp(argv[i], "BB") == 0) eval = EVAL_BB;
-			else if (strcmp(argv[i], "AA") == 0) eval = EVAL_AA;
-			else if (strcmp(argv[i], "D8") == 0) eval = EVAL_D8;
-			else if (strcmp(argv[i], "D7") == 0) eval = EVAL_D7;
-			else if (strcmp(argv[i], "D6") == 0) eval = EVAL_D6;
-			else if (strcmp(argv[i], "D5") == 0) eval = EVAL_D5;
-			else if (strcmp(argv[i], "D4") == 0) eval = EVAL_D4;
-			else if (strcmp(argv[i], "D3") == 0) eval = EVAL_D3;
-			else if (strcmp(argv[i], "angle+X") == 0) eval = EVAL_ANGLE_X;
-			else if (strcmp(argv[i], "corner+block") == 0) eval = EVAL_CORNER_BLOCK;
-			else print_usage();
-=======
 			else eval = get_eval_feature_option(argv[i]);
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		}
 		else if (strcmp(argv[i], "-feature") == 0) {
 			feature = get_eval_feature_option(argv[++i]);
@@ -3407,7 +2762,6 @@ int main(int argc, char** argv) {
 			if (strcmp(argv[++i], "spatial") == 0) filter = FILTER_SPATIAL;
 			else if (strcmp(argv[i], "temporal") == 0) filter = FILTER_TEMPORAL;
 			else print_usage();
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
 		}
 		else if (strcmp(argv[i], "-minimax") == 0) {
 			option.minimax_ply = atoi(argv[++i]);
@@ -3433,35 +2787,6 @@ int main(int argc, char** argv) {
 
 		eval_data = eval_builder_select_feature(base->n_games, eval);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-		switch (eval) {
-			/* case EVAL_EDAX:
-				eval_data = eval_builder_create_edax(base->n_games);
-				break;
-			case EVAL_EDAX_3b:
-				eval_data = eval_builder_create_edax3b(base->n_games);
-				break; */
-			case EVAL_EDAX_3c:
-				eval_data = eval_builder_create_edax3c(base->n_games);
-				break;
-			/* case EVAL_EDAX_3d:
-				eval_data = eval_builder_create_edax3d(base->n_games);
-				break; */
-			case EVAL_AJAX:
-				fprintf(stderr, "NOT IMPLEMENTED YET\n");
-				exit(EXIT_FAILURE);
-			case EVAL_LOGISTELLO:
-				eval_data = eval_builder_create_logistello(base->n_games);
-				break;
-			default:
-				eval_data = eval_builder_create_feature(base->n_games, eval);
-				break;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		if (file_3 != NULL)
 			eval_builder_read(eval_data, file_2);
 
@@ -3480,39 +2805,8 @@ int main(int argc, char** argv) {
 		base = gamebase_create(0);
 		gamebase_import(base, file_1, option.minimax_ply);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-		eval_data = eval_builder_select_feature(base->n_games, eval);
-
-=======
-		switch (eval) {
-			/* case EVAL_EDAX:
-				eval_data = eval_builder_create_edax(base->n_games);
-				break;
-			case EVAL_EDAX_3b:
-				eval_data = eval_builder_create_edax3b(base->n_games);
-				break; */
-			case EVAL_EDAX_3c:
-				eval_data = eval_builder_create_edax3c(base->n_games);
-				break;
-			/* case EVAL_EDAX_3d:
-				eval_data = eval_builder_create_edax3d(base->n_games);
-				break; */
-			case EVAL_AJAX:
-				fprintf(stderr, "NOT IMPLEMENTED YET\n");
-				exit(EXIT_FAILURE);
-			case EVAL_LOGISTELLO:
-				eval_data = eval_builder_create_logistello(base->n_games);
-				break;
-			default:
-				eval_data = eval_builder_create_feature(base->n_games, eval);
-				break;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 		eval_data = eval_builder_select_feature(base->n_games, eval);
 
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		// if (file_3 != NULL)
 		eval_builder_read(eval_data, file_2);
 
@@ -3542,35 +2836,6 @@ int main(int argc, char** argv) {
 
 		eval_data = eval_builder_select_feature(base->n_games, eval);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-		switch (eval) {
-			/* case EVAL_EDAX:
-				eval_data = eval_builder_create_edax(base->n_games);
-				break;
-			case EVAL_EDAX_3b:
-				eval_data = eval_builder_create_edax3b(base->n_games);
-				break; */
-			case EVAL_EDAX_3c:
-				eval_data = eval_builder_create_edax3c(base->n_games);
-				break;
-			/* case EVAL_EDAX_3d:
-				eval_data = eval_builder_create_edax3d(base->n_games);
-				break; */
-			case EVAL_AJAX:
-				fprintf(stderr, "NOT IMPLEMENTED YET\n");
-				exit(EXIT_FAILURE);
-			case EVAL_LOGISTELLO:
-				eval_data = eval_builder_create_logistello(base->n_games);
-				break;
-			default:
-				eval_data = eval_builder_create_feature(base->n_games, eval);
-				break;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		eval_builder_read(eval_data, file_2);
 		eval_builder_stat(eval_data, base);
 	}
@@ -3579,47 +2844,9 @@ int main(int argc, char** argv) {
 	else if (strcmp(argv[1], "merge") == 0) {
 		if (file_1 == NULL || file_2 == NULL || file_3 == NULL) print_usage();
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 		eval_data_1 = eval_builder_select_feature(1, eval);
 		eval_data_2 = eval_builder_select_feature(1, eval);
 
-=======
-		switch (eval) {
-			/* case EVAL_EDAX:
-				eval_data_1 = eval_builder_create_edax(1);
-				eval_data_2 = eval_builder_create_edax(1);
-				break;
-			case EVAL_EDAX_3b:
-				eval_data_1 = eval_builder_create_edax3b(1);
-				eval_data_2 = eval_builder_create_edax3b(1);
-				break; */
-			case EVAL_EDAX_3c:
-				eval_data_1 = eval_builder_create_edax3c(1);
-				eval_data_2 = eval_builder_create_edax3c(1);
-				break;
-			/* case EVAL_EDAX_3d:
-				eval_data_1 = eval_builder_create_edax3d(1);
-				eval_data_2 = eval_builder_create_edax3d(1);
-				break; */
-			case EVAL_AJAX:
-				fprintf(stderr, "NOT IMPLEMENTED YET\n");
-				exit(EXIT_FAILURE);
-			case EVAL_LOGISTELLO:
-				eval_data_1 = eval_builder_create_logistello(1);
-				eval_data_2 = eval_builder_create_logistello(1);
-				break;
-			default:
-				eval_data_1 = eval_builder_create_feature(1, eval);
-				eval_data_2 = eval_builder_create_feature(1, eval);
-				break;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
-		eval_data_1 = eval_builder_select_feature(1, eval);
-		eval_data_2 = eval_builder_select_feature(1, eval);
-
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		eval_builder_read(eval_data_1, file_1);
 		eval_builder_read(eval_data_2, file_2);
 
@@ -3631,47 +2858,9 @@ int main(int argc, char** argv) {
 	else if (strcmp(argv[1], "diff") == 0) {
 		if (file_1 == NULL || file_2 == NULL) print_usage();
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-		eval_data_1 = eval_builder_select_feature(1, eval);
-		eval_data_2 = eval_builder_select_feature(1, eval);
-
-=======
-		switch (eval) {
-			/* case EVAL_EDAX:
-				eval_data_1 = eval_builder_create_edax(1);
-				eval_data_2 = eval_builder_create_edax(1);
-				break;
-			case EVAL_EDAX_3b:
-				eval_data_1 = eval_builder_create_edax3b(1);
-				eval_data_2 = eval_builder_create_edax3b(1);
-				break; */
-			case EVAL_EDAX_3c:
-				eval_data_1 = eval_builder_create_edax3c(1);
-				eval_data_2 = eval_builder_create_edax3c(1);
-				break;
-			/* case EVAL_EDAX_3d:
-				eval_data_1 = eval_builder_create_edax3d(1);
-				eval_data_2 = eval_builder_create_edax3d(1);
-				break; */
-			case EVAL_AJAX:
-				fprintf(stderr, "NOT IMPLEMENTED YET\n");
-				exit(EXIT_FAILURE);
-			case EVAL_LOGISTELLO:
-				eval_data_1 = eval_builder_create_logistello(1);
-				eval_data_2 = eval_builder_create_logistello(1);
-				break;
-			default:
-				eval_data_1 = eval_builder_create_feature(1, eval);
-				eval_data_2 = eval_builder_create_feature(1, eval);
-				break;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 		eval_data_1 = eval_builder_select_feature(1, eval);
 		eval_data_2 = eval_builder_select_feature(1, eval);
 
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		eval_builder_read(eval_data_1, file_1);
 		eval_builder_read(eval_data_2, file_2);
 
@@ -3687,35 +2876,6 @@ int main(int argc, char** argv) {
 
 		eval_data = eval_builder_select_feature(base->n_games, eval);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-		switch (eval) {
-			/* case EVAL_EDAX:
-				eval_data = eval_builder_create_edax(base->n_games);
-				break;
-			case EVAL_EDAX_3b:
-				eval_data = eval_builder_create_edax3b(base->n_games);
-				break; */
-			case EVAL_EDAX_3c:
-				eval_data = eval_builder_create_edax3c(base->n_games);
-				break;
-			/* case EVAL_EDAX_3d:
-				eval_data = eval_builder_create_edax3d(base->n_games);
-				break; */
-			case EVAL_AJAX:
-				fprintf(stderr, "NOT IMPLEMENTED YET\n");
-				exit(EXIT_FAILURE);
-			case EVAL_LOGISTELLO:
-				eval_data = eval_builder_create_logistello(base->n_games);
-				break;
-			default:
-				eval_data = eval_builder_create_feature(base->n_games, eval);
-				break;
-		}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
->>>>>>> 265487b (Add evalgame command to eval_builder)
 		eval_builder_read(eval_data, file_2);
 		eval_builder_plot(eval_data, base, file_3);
 	}
@@ -3824,29 +2984,6 @@ int main(int argc, char** argv) {
 			printf("[%d] : %s\n", feature, file_2);
 			eval_builder_show_feature_weights(eval_data, feature, file_2);
 		}
-<<<<<<< HEAD
-	}
-
-	/* evalgame */
-	else if (strcmp(argv[1], "evalgame") == 0) {
-		if (file_1 == NULL || file_2 == NULL) print_usage();
-
-		base = gamebase_create(0);
-		gamebase_import(base, file_1, option.minimax_ply);
-
-		eval_data = eval_builder_select_feature(base->n_games, eval);
-
-		eval_builder_read(eval_data, file_2);
-		eval_builder_evalgame(eval_data, base);
-	}
-
-<<<<<<< HEAD
-	/* print usage */
-=======
-		/* print usage */
-	}
->>>>>>> 6336a36 (Ad hoc restore of eval_builder)
-=======
 	}
 
 	/* evalgame */
@@ -3863,7 +3000,6 @@ int main(int argc, char** argv) {
 	}
 
 	/* print usage */
->>>>>>> 265487b (Add evalgame command to eval_builder)
 	else {
 		print_usage();
 	}
diff --git a/src/eval_sse.c b/src/eval_sse.c
index c9d7b41..4681436 100644
--- a/src/eval_sse.c
+++ b/src/eval_sse.c
@@ -1,116 +1,28 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file eval_sse.c
  *
  * SSE/AVX translation of some eval.c functions
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 2018 - 2023
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 2018 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 2018 - 2022
-=======
- * @date 2018 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
  */
 
 #include <assert.h>
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #include "bit_intrinsics.h"
-=======
-#include "bit.h"
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#include "bit_intrinsics.h"
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 #include "board.h"
 #include "move.h"
 #include "eval.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 extern const EVAL_FEATURE_V EVAL_FEATURE[65];
 extern const EVAL_FEATURE_V EVAL_FEATURE_all_opponent;
 
 #ifdef __ARM_NEON
-<<<<<<< HEAD
-=======
-#if defined(__ARM_NEON__) || defined(_M_ARM) || defined(_M_ARM64)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-=======
-extern const EVAL_FEATURE_V EVAL_FEATURE[65];
-extern const EVAL_FEATURE_V EVAL_FEATURE_all_opponent;
-
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-#ifdef __ARM_NEON__
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #define __m128i		int16x8_t
 #define	_mm_add_epi16	vaddq_s16
 #define _mm_sub_epi16	vsubq_s16
 #define _mm_slli_epi16	vshlq_n_s16
-<<<<<<< HEAD
-#endif
-
-#if defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_MSVC_X86)
-
-void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
-{
-  #ifdef __AVX2__
-	__m256i	f0 = eval_in->feature.v16[0];
-	__m256i	f1 = eval_in->feature.v16[1];
-	__m256i	f2 = eval_in->feature.v16[2];
-
-	if (eval_in->n_empties & 1) {
-		f0 = _mm256_sub_epi16(f0, EVAL_FEATURE[x].v16[0]);
-		f1 = _mm256_sub_epi16(f1, EVAL_FEATURE[x].v16[1]);
-		f2 = _mm256_sub_epi16(f2, EVAL_FEATURE[x].v16[2]);
-
-		foreach_bit (x, f) {
-			f0 = _mm256_add_epi16(f0, EVAL_FEATURE[x].v16[0]);
-			f1 = _mm256_add_epi16(f1, EVAL_FEATURE[x].v16[1]);
-			f2 = _mm256_add_epi16(f2, EVAL_FEATURE[x].v16[2]);
-		}
-
-	} else {
-		f0 = _mm256_sub_epi16(f0, _mm256_slli_epi16(EVAL_FEATURE[x].v16[0], 1));
-		f1 = _mm256_sub_epi16(f1, _mm256_slli_epi16(EVAL_FEATURE[x].v16[1], 1));
-		f2 = _mm256_sub_epi16(f2, _mm256_slli_epi16(EVAL_FEATURE[x].v16[2], 1));
-
-		foreach_bit (x, f) {
-			f0 = _mm256_sub_epi16(f0, EVAL_FEATURE[x].v16[0]);
-			f1 = _mm256_sub_epi16(f1, EVAL_FEATURE[x].v16[1]);
-			f2 = _mm256_sub_epi16(f2, EVAL_FEATURE[x].v16[2]);
-		}
-	}
-
-=======
-typedef union {
-	unsigned short us[48];
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128i	v8[6];
-=======
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 #endif
 
 #if defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_MSVC_X86)
@@ -144,17 +56,11 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 			f2 = _mm256_sub_epi16(f2, EVAL_FEATURE[x].v16[2]);
 		}
 	}
-<<<<<<< HEAD
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 	eval_out->feature.v16[0] = f0;
 	eval_out->feature.v16[1] = f1;
 	eval_out->feature.v16[2] = f2;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
   #else
 	__m128i	f0 = eval_in->feature.v8[0];
 	__m128i	f1 = eval_in->feature.v8[1];
@@ -162,34 +68,14 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 	__m128i	f3 = eval_in->feature.v8[3];
 	__m128i	f4 = eval_in->feature.v8[4];
 	__m128i	f5 = eval_in->feature.v8[5];
-<<<<<<< HEAD
-
-	if (eval_in->n_empties & 1) {
-=======
-#else
-	int	j;
-	widest_register	b;
-=======
-  #else
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-	const __m128i *ef;
-
-<<<<<<< HEAD
-#ifdef HAS_CPU_64
-	foreach_bit(x, f) {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 
 	if (eval_in->n_empties & 1) {
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
 		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
 		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
 		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x].v8[3]);
 		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
 		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
-<<<<<<< HEAD
-<<<<<<< HEAD
 
 		foreach_bit (x, f) {
 			f0 = _mm_add_epi16(f0, EVAL_FEATURE[x].v8[0]);
@@ -209,7 +95,6 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 		f5 = _mm_sub_epi16(f5, _mm_slli_epi16(EVAL_FEATURE[x].v8[5], 1));
 
 		foreach_bit (x, f) {
-<<<<<<< HEAD
 			f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
 			f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
 			f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
@@ -219,112 +104,19 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 		}
 	}
 
-=======
-	}
-
-#else
-	unsigned int	fl = (unsigned int) f;
-	unsigned int	fh = (unsigned int) (f >> 32);
-
-	foreach_bit_32(x, fl) {
-		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-	foreach_bit_32(x, fh) {
-		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x + 32].v8[0]);
-		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x + 32].v8[1]);
-		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x + 32].v8[2]);
-		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x + 32].v8[3]);
-		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x + 32].v8[4]);
-		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x + 32].v8[5]);
-	}
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	ef = EVAL_FEATURE[x].v8;
-	__m128i	f0 = _mm_sub_epi16(eval_in->feature.v8[0], _mm_slli_epi16(ef[0], 1));
-	__m128i	f1 = _mm_sub_epi16(eval_in->feature.v8[1], _mm_slli_epi16(ef[1], 1));
-	__m128i	f2 = _mm_sub_epi16(eval_in->feature.v8[2], _mm_slli_epi16(ef[2], 1));
-	__m128i	f3 = _mm_sub_epi16(eval_in->feature.v8[3], _mm_slli_epi16(ef[3], 1));
-	__m128i	f4 = _mm_sub_epi16(eval_in->feature.v8[4], _mm_slli_epi16(ef[4], 1));
-	__m128i	f5 = _mm_sub_epi16(eval_in->feature.v8[5], _mm_slli_epi16(ef[5], 1));
-=======
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
-
-		foreach_bit_r (x, f, j, r) {
-			f0 = _mm_add_epi16(f0, EVAL_FEATURE[x].v8[0]);
-			f1 = _mm_add_epi16(f1, EVAL_FEATURE[x].v8[1]);
-			f2 = _mm_add_epi16(f2, EVAL_FEATURE[x].v8[2]);
-			f3 = _mm_add_epi16(f3, EVAL_FEATURE[x].v8[3]);
-			f4 = _mm_add_epi16(f4, EVAL_FEATURE[x].v8[4]);
-			f5 = _mm_add_epi16(f5, EVAL_FEATURE[x].v8[5]);
-		}
-
-<<<<<<< HEAD
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 	eval_out->feature.v8[0] = f0;
 	eval_out->feature.v8[1] = f1;
 	eval_out->feature.v8[2] = f2;
 	eval_out->feature.v8[3] = f3;
 	eval_out->feature.v8[4] = f4;
 	eval_out->feature.v8[5] = f5;
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #endif
-=======
-#endif
-=======
   #endif
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-}
-=======
-	} else {
-		f0 = _mm_sub_epi16(f0, _mm_slli_epi16(EVAL_FEATURE[x].v8[0], 1));
-		f1 = _mm_sub_epi16(f1, _mm_slli_epi16(EVAL_FEATURE[x].v8[1], 1));
-		f2 = _mm_sub_epi16(f2, _mm_slli_epi16(EVAL_FEATURE[x].v8[2], 1));
-		f3 = _mm_sub_epi16(f3, _mm_slli_epi16(EVAL_FEATURE[x].v8[3], 1));
-		f4 = _mm_sub_epi16(f4, _mm_slli_epi16(EVAL_FEATURE[x].v8[4], 1));
-		f5 = _mm_sub_epi16(f5, _mm_slli_epi16(EVAL_FEATURE[x].v8[5], 1));
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
-
-		foreach_bit_r (x, f, j, r) {
-=======
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-			f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
-			f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
-			f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
-			f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x].v8[3]);
-			f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
-			f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
-		}
-	}
-
-	eval_out->feature.v8[0] = f0;
-	eval_out->feature.v8[1] = f1;
-	eval_out->feature.v8[2] = f2;
-	eval_out->feature.v8[3] = f3;
-	eval_out->feature.v8[4] = f4;
-	eval_out->feature.v8[5] = f5;
-<<<<<<< HEAD
-#endif
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-  #endif
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 }
 
 #else	// SSE dispatch (Eval may not be aligned)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
 {
-<<<<<<< HEAD
 	__asm__ (
 		"movdqu	%0, %%xmm2\n\t"
 		"movdqu	%1, %%xmm3\n\t"
@@ -336,128 +128,12 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 	"m" (eval_in->feature.us[24]), "m" (eval_in->feature.us[32]), "m" (eval_in->feature.us[40]));
 
 	if (eval_in->n_empties & 1) {
-=======
-static void eval_update_sse_0(Eval *eval_out, const Eval *eval_in, const Move *move)
-=======
-static void eval_update_sse_0(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-=======
-void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
-{
-	widest_register	r;
-	int	j;
-
-=======
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-	__asm__ (
-		"movdqu	%0, %%xmm2\n\t"
-		"movdqu	%1, %%xmm3\n\t"
-		"movdqu	%2, %%xmm4\n\t"
-		"movdqu	%3, %%xmm5\n\t"
-		"movdqu	%4, %%xmm6\n\t"
-		"movdqu	%5, %%xmm7"
-	: :  "m" (eval_in->feature.us[0]), "m" (eval_in->feature.us[8]),  "m" (eval_in->feature.us[16]),
-	"m" (eval_in->feature.us[24]), "m" (eval_in->feature.us[32]), "m" (eval_in->feature.us[40]));
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	foreach_bit_32(x, fl) {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	foreach_bit_r(x, fl, b) {
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-	foreach_bit_r (x, f, j, r) {
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-		__asm__ (
-			"psubw	%0, %%xmm2\n\t"
-			"psubw	%1, %%xmm3\n\t"
-			"psubw	%2, %%xmm4\n\t"
-			"psubw	%3, %%xmm5\n\t"
-			"psubw	%4, %%xmm6\n\t"
-<<<<<<< HEAD
-			"psubw	%5, %%xmm7"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-
-		foreach_bit (x, f) {
-			__asm__ (
-				"paddw	%0, %%xmm2\n\t"
-				"paddw	%1, %%xmm3\n\t"
-				"paddw	%2, %%xmm4\n\t"
-				"paddw	%3, %%xmm5\n\t"
-				"paddw	%4, %%xmm6\n\t"
-				"paddw	%5, %%xmm7"
-			: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-			"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-		}
-
-	} else {
-		__asm__ (
-			"movdqa	%0, %%xmm0\n\t"		"movdqa	%1, %%xmm1\n\t"
-			"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-			"psubw	%%xmm0, %%xmm2\n\t"	"psubw	%%xmm1, %%xmm3\n\t"
-			"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-			"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-			"psubw	%%xmm0, %%xmm4\n\t"	"psubw	%%xmm1, %%xmm5\n\t"
-			"movdqa	%4, %%xmm0\n\t"		"movdqa	%5, %%xmm1\n\t"
-			"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-			"psubw	%%xmm0, %%xmm6\n\t"	"psubw	%%xmm1, %%xmm7"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-
-		foreach_bit (x, f) {
-			__asm__ (
-				"psubw	%0, %%xmm2\n\t"
-				"psubw	%1, %%xmm3\n\t"
-				"psubw	%2, %%xmm4\n\t"
-				"psubw	%3, %%xmm5\n\t"
-				"psubw	%4, %%xmm6\n\t"
-				"psubw	%5, %%xmm7"
-			: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-			"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-		}
-=======
-			"psubw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-	}
-<<<<<<< HEAD
-	foreach_bit_r(x, fh, b) {
-=======
-	if (eval_in->n_empties & 1) {
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 		__asm__ (
 			"psubw	%0, %%xmm2\n\t"
 			"psubw	%1, %%xmm3\n\t"
 			"psubw	%2, %%xmm4\n\t"
 			"psubw	%3, %%xmm5\n\t"
 			"psubw	%4, %%xmm6\n\t"
-<<<<<<< HEAD
-			"psubw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x + 32].us[0]), "m" (EVAL_FEATURE[x + 32].us[8]), "m" (EVAL_FEATURE[x + 32].us[16]),
-		"m" (EVAL_FEATURE[x + 32].us[24]), "m" (EVAL_FEATURE[x + 32].us[32]), "m" (EVAL_FEATURE[x + 32].us[40]));
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-	}
-=======
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-
-	__asm__ (
-		"movdqu	%%xmm2, %0\n\t"
-		"movdqu	%%xmm3, %1\n\t"
-		"movdqu	%%xmm4, %2\n\t"
-		"movdqu	%%xmm5, %3\n\t"
-		"movdqu	%%xmm6, %4\n\t"
-<<<<<<< HEAD
-		"movdqu	%%xmm7, %5"
-=======
-		"movdqu	%%xmm7, %5\n\t"
-	: :  "m" (eval_out->feature.us[0]), "m" (eval_out->feature.us[8]), "m" (eval_out->feature.us[16]),
-	"m" (eval_out->feature.us[24]), "m" (eval_out->feature.us[32]), "m" (eval_out->feature.us[40]));
-}
-=======
 			"psubw	%5, %%xmm7"
 		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
 		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
@@ -473,7 +149,6 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 			: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
 			"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
 		}
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 
 	} else {
 		__asm__ (
@@ -508,31 +183,14 @@ void eval_update_sse(int x, unsigned long long f, Eval *eval_out, const Eval *ev
 		"movdqu	%%xmm4, %2\n\t"
 		"movdqu	%%xmm5, %3\n\t"
 		"movdqu	%%xmm6, %4\n\t"
-<<<<<<< HEAD
-		"movdqu	%%xmm7, %5\n\t"
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 		"movdqu	%%xmm7, %5"
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 	: :  "m" (eval_out->feature.us[0]), "m" (eval_out->feature.us[8]), "m" (eval_out->feature.us[16]),
 	"m" (eval_out->feature.us[24]), "m" (eval_out->feature.us[32]), "m" (eval_out->feature.us[40]));
 }
 
 #endif // hasSSE2
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || (defined(__ARM_NEON) && !defined(DISPATCH_NEON))
-=======
-#ifdef hasSSE2
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#if defined(hasSSE2) || defined(hasNeon)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
 #if defined(hasSSE2) || (defined(__ARM_NEON) && !defined(DISPATCH_NEON))
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 
 /**
  * @brief Set up evaluation features from a board.
@@ -544,68 +202,19 @@ void eval_set(Eval *eval, const Board *board)
 {
 	int x;
 	unsigned long long b = (eval->n_empties & 1) ? board->opponent : board->player;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #ifdef __AVX2__
-=======
-	static const EVAL_FEATURE_V EVAL_FEATURE_all_opponent = {{
-		 9841,  9841,  9841,  9841, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524,
-		 3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  1093,  1093,
-		 1093,  1093,   364,   364,   364,   364,   121,   121,   121,   121,    40,    40,    40,    40,     0,     0
-	}};
-=======
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-#ifdef __AVX2__
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	widest_register	r;
-=======
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
   #ifdef __AVX2__
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 	__m256i	f0 = EVAL_FEATURE_all_opponent.v16[0];
 	__m256i	f1 = EVAL_FEATURE_all_opponent.v16[1];
 	__m256i	f2 = EVAL_FEATURE_all_opponent.v16[2];
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	foreach_bit (x, b) {
-=======
-	foreach_bit(x, b) {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	foreach_bit_r (x, b, j, r) {
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
-	foreach_bit (x, b) {
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 		f0 = _mm256_sub_epi16(f0, EVAL_FEATURE[x].v16[0]);
 		f1 = _mm256_sub_epi16(f1, EVAL_FEATURE[x].v16[1]);
 		f2 = _mm256_sub_epi16(f2, EVAL_FEATURE[x].v16[2]);
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-	b = ~(board->opponent | board->player);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	foreach_bit (x, b) {
-=======
-=======
 
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 	b = ~(board->opponent | board->player);
-	foreach_bit(x, b) {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	foreach_bit_r (x, b, j, r) {
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
 	foreach_bit (x, b) {
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 		f0 = _mm256_add_epi16(f0, EVAL_FEATURE[x].v16[0]);
 		f1 = _mm256_add_epi16(f1, EVAL_FEATURE[x].v16[1]);
 		f2 = _mm256_add_epi16(f2, EVAL_FEATURE[x].v16[2]);
@@ -614,15 +223,7 @@ void eval_set(Eval *eval, const Board *board)
 	eval->feature.v16[1] = f1;
 	eval->feature.v16[2] = f2;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #else
-=======
-#else
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
   #else
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
 	__m128i	f0 = EVAL_FEATURE_all_opponent.v8[0];
 	__m128i	f1 = EVAL_FEATURE_all_opponent.v8[1];
 	__m128i	f2 = EVAL_FEATURE_all_opponent.v8[2];
@@ -630,19 +231,7 @@ void eval_set(Eval *eval, const Board *board)
 	__m128i	f4 = EVAL_FEATURE_all_opponent.v8[4];
 	__m128i	f5 = EVAL_FEATURE_all_opponent.v8[5];
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	foreach_bit (x, b) {
-=======
-	foreach_bit(x, b) {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	foreach_bit_r (x, b, j, r) {
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
 	foreach_bit (x, b) {
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
 		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
 		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
@@ -650,26 +239,9 @@ void eval_set(Eval *eval, const Board *board)
 		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
 		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
 
 	b = ~(board->opponent | board->player);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	foreach_bit (x, b) {
-=======
-=======
-
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-	b = ~(board->opponent | board->player);
-	foreach_bit(x, b) {
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	foreach_bit_r (x, b, j, r) {
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
-	foreach_bit (x, b) {
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 		f0 = _mm_add_epi16(f0, EVAL_FEATURE[x].v8[0]);
 		f1 = _mm_add_epi16(f1, EVAL_FEATURE[x].v8[1]);
 		f2 = _mm_add_epi16(f2, EVAL_FEATURE[x].v8[2]);
@@ -684,817 +256,7 @@ void eval_set(Eval *eval, const Board *board)
 	eval->feature.v8[3] = f3;
 	eval->feature.v8[4] = f4;
 	eval->feature.v8[5] = f5;
-<<<<<<< HEAD
-<<<<<<< HEAD
-  #endif
-}
-
-<<<<<<< HEAD
-#endif // hasSSE2
-=======
-/**
- * @file eval_sse.c
- *
- * SSE/AVX translation of some eval.c functions
- *
- * @date 2018 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include <assert.h>
-
-#include "bit.h"
-#include "board.h"
-#include "move.h"
-#include "eval.h"
-
-typedef union {
-	unsigned short us[48];
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128i	v8[6];
-#endif
-#ifdef __AVX2__
-	__m256i	v16[3];
-#endif
-}
-#if defined(__GNUC__) && !defined(hasSSE2)
-__attribute__ ((aligned (16)))
-#endif
-EVAL_FEATURE_V;
-
-static const EVAL_FEATURE_V EVAL_FEATURE[65] = {
-	{{ // a1
-		 6561,     0,     0,     0,   243,     0,     0,     0,  6561,     0,  6561,     0, 19683,     0, 19683,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b1
-		 2187,     0,     0,     0,    27,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c1
-		   81,     0,     0,     0,     9,     0,     0,     0,   729,     0,     0,     0,  6561,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d1
-		    0,     0,     0,     0,     3,     1,     0,     0,   243,     0,     0,     0,  2187,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,    81,     0,     0,     0,    27,     0,     0,     0,     0,     0
-	}}, {{ // e1
-		    0,     0,     0,     0,     1,     3,     0,     0,    81,     0,     0,     0,     9,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     0,    27,     0,     0,     0
-	}}, {{ // f1
-		    0,    81,     0,     0,     0,     9,     0,     0,    27,     0,     0,     0,     3,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g1
-		    0,  2187,     0,     0,     0,    27,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h1
-		    0,  6561,     0,     0,     0,   243,     0,     0,     3,     0,     0,  6561,     1,     0,     0, 19683,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a2
-		  729,     0,     0,     0,   729,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,
-		 2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		  729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b2
-		  243,     0,     0,     0,    81,     0,     0,     0, 19683,     0, 19683,     0,     0,     0,     0,     0,
-		  729,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c2
-		    9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,
-		  243,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,   243,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0
-	}}, {{ // d2
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,     0,     0,     0,
-		   81,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,
-		    0,     0,    81,     0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e2
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     0,
-		   27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,
-		    0,     0,     0,     0,    81,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f2
-		    0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,     0,     0,
-		    9,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0
-	}}, {{ // g2
-		    0,   243,     0,     0,     0,    81,     0,     0,     1,     0,     0, 19683,     0,     0,     0,     0,
-		    3,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h2
-		    0,   729,     0,     0,     0,   729,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,
-		    1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a3
-		   27,     0,     0,     0,  2187,     0,     0,     0,     0,     0,   729,     0,     0,     0,  6561,     0,
-		    0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b3
-		    3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,
-		    0,     0,   243,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		  243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0
-	}}, {{ // c3
-		    1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,   243,     0,   243,     0,     0,     0,     0,     0,   243,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d3
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,    81,     0,     0,     0,     0,     0,   243,     0,     0,     0,    81,     0,
-		    0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e3
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,   243,     0,     0,     0,     0,
-		    0,    81,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f3
-		    0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     9,     0,     0,   243,     0,     0,     0,     0,     0,     9,     0,     0,
-		    0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g3
-		    0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,
-		    0,     0,     0,   243,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0
-	}}, {{ // h3
-		    0,    27,     0,     0,     0,  2187,     0,     0,     0,     0,     0,   729,     0,     0,     0,  6561,
-		    0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,   243,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a4
-		    0,     0,     0,     0,  6561,     0, 19683,     0,     0,     0,   243,     0,     0,     0,  2187,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     1,     0,     0,     0,     0,     0
-	}}, {{ // b4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,     0,
-		    0,     0,    81,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,    81,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,    81,     0,   243,     0,     0,     0,     0,     0,     0,     0,
-		   81,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,    81,     0,    81,     0,    81,     0,     0,     0,
-		    0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,    27,     0,     0,    81,     0,    27,    27,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,    81,     9,     0,     0,     0,     0,     0,     0,    81,
-		    0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g4
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,
-		    0,     0,     0,    81,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,    81,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h4
-		    0,     0,     0,     0,     0,  6561,     0, 19683,     0,     0,     0,   243,     0,     0,     0,  2187,
-		    0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,     1,     0,     0,     0
-	}}, {{ // a5
-		    0,     0,     0,     0, 19683,     0,  6561,     0,     0,     0,    81,     0,     0,     0,     9,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,    27,     0,     0,     0,     0
-	}}, {{ // b5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,
-		    0,     0,    27,     0,     0,     0,     0,     0,     0,   729,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     3,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,    27,     0,     0,   243,     0,     0,     0,     0,     0,     0,
-		    0,     9,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,    81,    27,     0,     0,    81,     0,     0,
-		   27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,    27,    27,     0,     0,    27,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,    27,     0,     9,     0,     0,     0,     0,     9,     0,
-		    0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g5
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,
-		    0,     0,     0,    27,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,
-		    0,     0,     3,     0,     0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     0
-	}}, {{ // h5
-		    0,     0,     0,     0,     0, 19683,     0,  6561,     0,     0,     0,    81,     0,     0,     0,     9,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,    27,     0,     0
-	}}, {{ // a6
-		    0,     0,    81,     0,     0,     0,  2187,     0,     0,     0,    27,     0,     0,     0,     3,     0,
-		    0,     0,     0,     0,     0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b6
-		    0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,
-		    0,     0,     9,     0,     0,   729,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0
-	}}, {{ // c6
-		    0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,   243,     9,     0,     0,     0,     0,     0,     0,   243,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d6
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,    81,     0,     0,     0,     0,     9,     0,     0,     0,     0,     9,
-		    0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e6
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,    27,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,
-		    9,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // f6
-		    0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     9,     0,     9,     0,     0,     0,     0,     9,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0,     0,     0,     0,     0
-	}}, {{ // g6
-		    0,     0,     0,     9,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,
-		    0,     0,     0,     9,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     9,     0,     0
-	}}, {{ // h6
-		    0,     0,     0,    81,     0,     0,     0,  2187,     0,     0,     0,    27,     0,     0,     0,     3,
-		    0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a7
-		    0,     0,  2187,     0,     0,     0,   729,     0,     0,     0,     9,     0,     0,     0,     0,     0,
-		    0,  2187,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b7
-		    0,     0,   243,     0,     0,     0,    81,     0,     0, 19683,     1,     0,     0,     0,     0,     0,
-		    0,   729,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c7
-		    0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   729,     0,     0,
-		    0,   243,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     3,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0
-	}}, {{ // d7
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,   243,     0,     0,
-		    0,    81,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     3,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // e7
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    81,     0,     0,
-		    0,    27,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,
-		    0,     0,     0,     3,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0
-	}}, {{ // f7
-		    0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     0,    27,     0,     0,
-		    0,     9,     0,     0,     0,     0,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,
-		    3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0
-	}}, {{ // g7
-		    0,     0,     0,   243,     0,     0,     0,    81,     0,     1,     0,     1,     0,     0,     0,     0,
-		    0,     3,     0,     3,     0,     0,     0,     0,     0,     0,     0,     0,     3,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h7
-		    0,     0,     0,  2187,     0,     0,     0,   729,     0,     0,     0,     9,     0,     0,     0,     0,
-		    0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // a8
-		    0,     0,  6561,     0,     0,     0,   243,     0,     0,  6561,     3,     0,     0, 19683,     1,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,  2187,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // b8
-		    0,     0,   729,     0,     0,     0,    27,     0,     0,  2187,     0,     0,     0,     0,     0,     0,
-		    0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // c8
-		    0,     0,    27,     0,     0,     0,     9,     0,     0,   729,     0,     0,     0,  6561,     0,     0,
-		    0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // d8
-		    0,     0,     0,     0,     0,     0,     3,     1,     0,   243,     0,     0,     0,  2187,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     1,     0,     0,     0,     0
-	}}, {{ // e8
-		    0,     0,     0,     0,     0,     0,     1,     3,     0,    81,     0,     0,     0,     9,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     1,     0,     0
-	}}, {{ // f8
-		    0,     0,     0,    27,     0,     0,     0,     9,     0,    27,     0,     0,     0,     3,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // g8
-		    0,     0,     0,   729,     0,     0,     0,    27,     0,     9,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    1,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // h8
-		    0,     0,     0,  6561,     0,     0,     0,   243,     0,     3,     0,     3,     0,     1,     0,     1,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     1,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}, {{ // PASS
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-		    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0
-	}}
-};
-
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-
-static void eval_update_sse_0(Eval *eval_out, const Eval *eval_in, const Move *move)
-{
-	int	x = move->x;
-	unsigned long long f = move->flipped;
-#ifdef __AVX2__
-	__m256i	f0 = _mm256_sub_epi16(eval_in->feature.v16[0], _mm256_slli_epi16(EVAL_FEATURE[x].v16[0], 1));
-	__m256i	f1 = _mm256_sub_epi16(eval_in->feature.v16[1], _mm256_slli_epi16(EVAL_FEATURE[x].v16[1], 1));
-	__m256i	f2 = _mm256_sub_epi16(eval_in->feature.v16[2], _mm256_slli_epi16(EVAL_FEATURE[x].v16[2], 1));
-
-	foreach_bit(x, f) {
-		f0 = _mm256_sub_epi16(f0, EVAL_FEATURE[x].v16[0]);
-		f1 = _mm256_sub_epi16(f1, EVAL_FEATURE[x].v16[1]);
-		f2 = _mm256_sub_epi16(f2, EVAL_FEATURE[x].v16[2]);
-	}
-	eval_out->feature.v16[0] = f0;
-	eval_out->feature.v16[1] = f1;
-	eval_out->feature.v16[2] = f2;
-
-#else
-	__m128i	f0 = _mm_sub_epi16(eval_in->feature.v8[0], _mm_slli_epi16(EVAL_FEATURE[x].v8[0], 1));
-	__m128i	f1 = _mm_sub_epi16(eval_in->feature.v8[1], _mm_slli_epi16(EVAL_FEATURE[x].v8[1], 1));
-	__m128i	f2 = _mm_sub_epi16(eval_in->feature.v8[2], _mm_slli_epi16(EVAL_FEATURE[x].v8[2], 1));
-	__m128i	f3 = _mm_sub_epi16(eval_in->feature.v8[3], _mm_slli_epi16(EVAL_FEATURE[x].v8[3], 1));
-	__m128i	f4 = _mm_sub_epi16(eval_in->feature.v8[4], _mm_slli_epi16(EVAL_FEATURE[x].v8[4], 1));
-	__m128i	f5 = _mm_sub_epi16(eval_in->feature.v8[5], _mm_slli_epi16(EVAL_FEATURE[x].v8[5], 1));
-
-#ifdef HAS_CPU_64
-	foreach_bit(x, f) {
-		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-
-#else
-	unsigned int	fl = (unsigned int) f;
-	unsigned int	fh = (unsigned int) (f >> 32);
-
-	foreach_bit_32(x, fl) {
-		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-	foreach_bit_32(x, fh) {
-		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x + 32].v8[0]);
-		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x + 32].v8[1]);
-		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x + 32].v8[2]);
-		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x + 32].v8[3]);
-		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x + 32].v8[4]);
-		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x + 32].v8[5]);
-	}
-#endif
-	eval_out->feature.v8[0] = f0;
-	eval_out->feature.v8[1] = f1;
-	eval_out->feature.v8[2] = f2;
-	eval_out->feature.v8[3] = f3;
-	eval_out->feature.v8[4] = f4;
-	eval_out->feature.v8[5] = f5;
-#endif
-}
-
-/**
- * @brief Update the features after a player's move.
- *
- * @param eval  Evaluation function.
- * @param move  Move.
- */
-static void eval_update_sse_1(Eval *eval_out, const Eval *eval_in, const Move *move)
-{
-	int	x = move->x;
-	unsigned long long f = move->flipped;
-#ifdef __AVX2__
-	__m256i	f0 = _mm256_sub_epi16(eval_in->feature.v16[0], EVAL_FEATURE[x].v16[0]);
-	__m256i	f1 = _mm256_sub_epi16(eval_in->feature.v16[1], EVAL_FEATURE[x].v16[1]);
-	__m256i	f2 = _mm256_sub_epi16(eval_in->feature.v16[2], EVAL_FEATURE[x].v16[2]);
-
-	foreach_bit(x, f) {
-		f0 = _mm256_add_epi16(f0, EVAL_FEATURE[x].v16[0]);
-		f1 = _mm256_add_epi16(f1, EVAL_FEATURE[x].v16[1]);
-		f2 = _mm256_add_epi16(f2, EVAL_FEATURE[x].v16[2]);
-	}
-	eval_out->feature.v16[0] = f0;
-	eval_out->feature.v16[1] = f1;
-	eval_out->feature.v16[2] = f2;
-
-#else
-	__m128i	f0 = _mm_sub_epi16(eval_in->feature.v8[0], EVAL_FEATURE[x].v8[0]);
-	__m128i	f1 = _mm_sub_epi16(eval_in->feature.v8[1], EVAL_FEATURE[x].v8[1]);
-	__m128i	f2 = _mm_sub_epi16(eval_in->feature.v8[2], EVAL_FEATURE[x].v8[2]);
-	__m128i	f3 = _mm_sub_epi16(eval_in->feature.v8[3], EVAL_FEATURE[x].v8[3]);
-	__m128i	f4 = _mm_sub_epi16(eval_in->feature.v8[4], EVAL_FEATURE[x].v8[4]);
-	__m128i	f5 = _mm_sub_epi16(eval_in->feature.v8[5], EVAL_FEATURE[x].v8[5]);
-
-#ifdef HAS_CPU_64
-	foreach_bit(x, f) {
-		f0 = _mm_add_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_add_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_add_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_add_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_add_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_add_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-
-#else
-	unsigned int	fl = (unsigned int) f;
-	unsigned int	fh = (unsigned int) (f >> 32);
-
-	foreach_bit_32(x, fl) {
-		f0 = _mm_add_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_add_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_add_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_add_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_add_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_add_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-	foreach_bit_32(x, fh) {
-		f0 = _mm_add_epi16(f0, EVAL_FEATURE[x + 32].v8[0]);
-		f1 = _mm_add_epi16(f1, EVAL_FEATURE[x + 32].v8[1]);
-		f2 = _mm_add_epi16(f2, EVAL_FEATURE[x + 32].v8[2]);
-		f3 = _mm_add_epi16(f3, EVAL_FEATURE[x + 32].v8[3]);
-		f4 = _mm_add_epi16(f4, EVAL_FEATURE[x + 32].v8[4]);
-		f5 = _mm_add_epi16(f5, EVAL_FEATURE[x + 32].v8[5]);
-	}
-
-#endif
-	eval_out->feature.v8[0] = f0;
-	eval_out->feature.v8[1] = f1;
-	eval_out->feature.v8[2] = f2;
-	eval_out->feature.v8[3] = f3;
-	eval_out->feature.v8[4] = f4;
-	eval_out->feature.v8[5] = f5;
-#endif
-}
-
-#else	// SSE dispatch (Eval may not be aligned)
-
-static void eval_update_sse_0(Eval *eval_out, const Eval *eval_in, const Move *move)
-{
-	int	x = move->x;
-	unsigned int	fl = (unsigned int) move->flipped;
-	unsigned int	fh = (unsigned int) (move->flipped >> 32);
-
-	__asm__ (
-		"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-		"movdqu	%0, %%xmm2\n\t"		"movdqu	%1, %%xmm3\n\t"
-		"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-		"psubw	%%xmm0, %%xmm2\n\t"	"psubw	%%xmm1, %%xmm3\n"
-	: :  "m" (eval_in->feature.us[0]), "m" (eval_in->feature.us[8]), "m" (EVAL_FEATURE[x].us[0]),  "m" (EVAL_FEATURE[x].us[8]));
-	__asm__ (
-		"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-		"movdqu	%0, %%xmm4\n\t"		"movdqu	%1, %%xmm5\n\t"
-		"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-		"psubw	%%xmm0, %%xmm4\n\t"	"psubw	%%xmm1, %%xmm5\n"
-	: :  "m" (eval_in->feature.us[16]), "m" (eval_in->feature.us[24]), "m" (EVAL_FEATURE[x].us[16]),  "m" (EVAL_FEATURE[x].us[24]));
-	__asm__ (
-		"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-		"movdqu	%0, %%xmm6\n\t"		"movdqu	%1, %%xmm7\n\t"
-		"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-		"psubw	%%xmm0, %%xmm6\n\t"	"psubw	%%xmm1, %%xmm7\n"
-	: :  "m" (eval_in->feature.us[32]), "m" (eval_in->feature.us[40]), "m" (EVAL_FEATURE[x].us[32]),  "m" (EVAL_FEATURE[x].us[40]));
-
-	foreach_bit_32(x, fl) {
-		__asm__ (
-			"psubw	%0, %%xmm2\n\t"
-			"psubw	%1, %%xmm3\n\t"
-			"psubw	%2, %%xmm4\n\t"
-			"psubw	%3, %%xmm5\n\t"
-			"psubw	%4, %%xmm6\n\t"
-			"psubw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-	}
-	foreach_bit_32(x, fh) {
-		__asm__ (
-			"psubw	%0, %%xmm2\n\t"
-			"psubw	%1, %%xmm3\n\t"
-			"psubw	%2, %%xmm4\n\t"
-			"psubw	%3, %%xmm5\n\t"
-			"psubw	%4, %%xmm6\n\t"
-			"psubw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x + 32].us[0]), "m" (EVAL_FEATURE[x + 32].us[8]), "m" (EVAL_FEATURE[x + 32].us[16]),
-		"m" (EVAL_FEATURE[x + 32].us[24]), "m" (EVAL_FEATURE[x + 32].us[32]), "m" (EVAL_FEATURE[x + 32].us[40]));
-	}
-
-	__asm__ (
-		"movdqu	%%xmm2, %0\n\t"
-		"movdqu	%%xmm3, %1\n\t"
-		"movdqu	%%xmm4, %2\n\t"
-		"movdqu	%%xmm5, %3\n\t"
-		"movdqu	%%xmm6, %4\n\t"
-		"movdqu	%%xmm7, %5\n\t"
-	: :  "m" (eval_out->feature.us[0]), "m" (eval_out->feature.us[8]), "m" (eval_out->feature.us[16]),
-	"m" (eval_out->feature.us[24]), "m" (eval_out->feature.us[32]), "m" (eval_out->feature.us[40]));
-}
-
-static void eval_update_sse_1(Eval *eval_out, const Eval *eval_in, const Move *move)
-{
-	int	x = move->x;
-	unsigned int	fl = (unsigned int) move->flipped;
-	unsigned int	fh = (unsigned int) (move->flipped >> 32);
-
-	__asm__ (
-		"movdqu	%0, %%xmm2\n\t"		"movdqu	%1, %%xmm3\n\t"
-		"psubw	%2, %%xmm2\n\t"		"psubw	%3, %%xmm3\n"
-	: :  "m" (eval_in->feature.us[0]), "m" (eval_in->feature.us[8]), "m" (EVAL_FEATURE[x].us[0]),  "m" (EVAL_FEATURE[x].us[8]));
-	__asm__ (
-		"movdqu	%0, %%xmm4\n\t"		"movdqu	%1, %%xmm5\n\t"
-		"psubw	%2, %%xmm4\n\t"		"psubw	%3, %%xmm5\n"
-	: :  "m" (eval_in->feature.us[16]), "m" (eval_in->feature.us[24]), "m" (EVAL_FEATURE[x].us[16]),  "m" (EVAL_FEATURE[x].us[24]));
-	__asm__ (
-		"movdqu	%0, %%xmm6\n\t"		"movdqu	%1, %%xmm7\n\t"
-		"psubw	%2, %%xmm6\n\t"		"psubw	%3, %%xmm7\n"
-	: :  "m" (eval_in->feature.us[32]), "m" (eval_in->feature.us[40]), "m" (EVAL_FEATURE[x].us[32]),  "m" (EVAL_FEATURE[x].us[40]));
-
-	foreach_bit_32(x, fl) {
-		__asm__ (
-			"paddw	%0, %%xmm2\n\t"
-			"paddw	%1, %%xmm3\n\t"
-			"paddw	%2, %%xmm4\n\t"
-			"paddw	%3, %%xmm5\n\t"
-			"paddw	%4, %%xmm6\n\t"
-			"paddw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-	}
-	foreach_bit_32(x, fh) {
-		__asm__ (
-			"paddw	%0, %%xmm2\n\t"
-			"paddw	%1, %%xmm3\n\t"
-			"paddw	%2, %%xmm4\n\t"
-			"paddw	%3, %%xmm5\n\t"
-			"paddw	%4, %%xmm6\n\t"
-			"paddw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x + 32].us[0]), "m" (EVAL_FEATURE[x + 32].us[8]), "m" (EVAL_FEATURE[x + 32].us[16]),
-		"m" (EVAL_FEATURE[x + 32].us[24]), "m" (EVAL_FEATURE[x + 32].us[32]), "m" (EVAL_FEATURE[x + 32].us[40]));
-	}
-
-	__asm__ (
-		"movdqu	%%xmm2, %0\n\t"
-		"movdqu	%%xmm3, %1\n\t"
-		"movdqu	%%xmm4, %2\n\t"
-		"movdqu	%%xmm5, %3\n\t"
-		"movdqu	%%xmm6, %4\n\t"
-		"movdqu	%%xmm7, %5\n\t"
-	: :  "m" (eval_out->feature.us[0]), "m" (eval_out->feature.us[8]), "m" (eval_out->feature.us[16]),
-	"m" (eval_out->feature.us[24]), "m" (eval_out->feature.us[32]), "m" (eval_out->feature.us[40]));
-}
-
-<<<<<<< HEAD
-static void eval_restore_sse_0(Eval *eval, const Move *move)
-{
-	int	x = move->x;
-	unsigned int	fl = (unsigned int) move->flipped;
-	unsigned int	fh = (unsigned int) (move->flipped >> 32);
-
-	__asm__ (
-		"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-		"movdqu	%0, %%xmm2\n\t"		"movdqu	%1, %%xmm3\n\t"
-		"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-		"paddw	%%xmm0, %%xmm2\n\t"	"paddw	%%xmm1, %%xmm3\n"
-	: :  "m" (eval->feature.us[0]), "m" (eval->feature.us[8]), "m" (EVAL_FEATURE[x].us[0]),  "m" (EVAL_FEATURE[x].us[8]));
-	__asm__ (
-		"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-		"movdqu	%0, %%xmm4\n\t"		"movdqu	%1, %%xmm5\n\t"
-		"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-		"paddw	%%xmm0, %%xmm4\n\t"	"paddw	%%xmm1, %%xmm5\n"
-	: :  "m" (eval->feature.us[16]), "m" (eval->feature.us[24]), "m" (EVAL_FEATURE[x].us[16]),  "m" (EVAL_FEATURE[x].us[24]));
-	__asm__ (
-		"movdqa	%2, %%xmm0\n\t"		"movdqa	%3, %%xmm1\n\t"
-		"movdqu	%0, %%xmm6\n\t"		"movdqu	%1, %%xmm7\n\t"
-		"psllw	$1, %%xmm0\n\t"		"psllw	$1, %%xmm1\n\t"
-		"paddw	%%xmm0, %%xmm6\n\t"	"paddw	%%xmm1, %%xmm7\n"
-	: :  "m" (eval->feature.us[32]), "m" (eval->feature.us[40]), "m" (EVAL_FEATURE[x].us[32]),  "m" (EVAL_FEATURE[x].us[40]));
-
-	foreach_bit_32(x, fl) {
-		__asm__ (
-			"paddw	%0, %%xmm2\n\t"
-			"paddw	%1, %%xmm3\n\t"
-			"paddw	%2, %%xmm4\n\t"
-			"paddw	%3, %%xmm5\n\t"
-			"paddw	%4, %%xmm6\n\t"
-			"paddw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-	}
-	foreach_bit_32(x, fh) {
-		__asm__ (
-			"paddw	%0, %%xmm2\n\t"
-			"paddw	%1, %%xmm3\n\t"
-			"paddw	%2, %%xmm4\n\t"
-			"paddw	%3, %%xmm5\n\t"
-			"paddw	%4, %%xmm6\n\t"
-			"paddw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x + 32].us[0]), "m" (EVAL_FEATURE[x + 32].us[8]), "m" (EVAL_FEATURE[x + 32].us[16]),
-		"m" (EVAL_FEATURE[x + 32].us[24]), "m" (EVAL_FEATURE[x + 32].us[32]), "m" (EVAL_FEATURE[x + 32].us[40]));
-	}
-
-	__asm__ (
-		"movdqu	%%xmm2, %0\n\t"
-		"movdqu	%%xmm3, %1\n\t"
-		"movdqu	%%xmm4, %2\n\t"
-		"movdqu	%%xmm5, %3\n\t"
-		"movdqu	%%xmm6, %4\n\t"
-		"movdqu	%%xmm7, %5\n\t"
-	: :  "m" (eval->feature.us[0]), "m" (eval->feature.us[8]), "m" (eval->feature.us[16]),
-	"m" (eval->feature.us[24]), "m" (eval->feature.us[32]), "m" (eval->feature.us[40]));
-}
-
-static void eval_restore_sse_1(Eval *eval, const Move *move)
-{
-	int	x = move->x;
-	unsigned int	fl = (unsigned int) move->flipped;
-	unsigned int	fh = (unsigned int) (move->flipped >> 32);
-
-	__asm__ (
-		"movdqu	%0, %%xmm2\n\t"		"movdqu	%1, %%xmm3\n\t"
-		"paddw	%2, %%xmm2\n\t"		"paddw	%3, %%xmm3\n"
-	: :  "m" (eval->feature.us[0]), "m" (eval->feature.us[8]), "m" (EVAL_FEATURE[x].us[0]),  "m" (EVAL_FEATURE[x].us[8]));
-	__asm__ (
-		"movdqu	%0, %%xmm4\n\t"		"movdqu	%1, %%xmm5\n\t"
-		"paddw	%2, %%xmm4\n\t"		"paddw	%3, %%xmm5\n"
-	: :  "m" (eval->feature.us[16]), "m" (eval->feature.us[24]), "m" (EVAL_FEATURE[x].us[16]),  "m" (EVAL_FEATURE[x].us[24]));
-	__asm__ (
-		"movdqu	%0, %%xmm6\n\t"		"movdqu	%1, %%xmm7\n\t"
-		"paddw	%2, %%xmm6\n\t"		"paddw	%3, %%xmm7\n"
-	: :  "m" (eval->feature.us[32]), "m" (eval->feature.us[40]), "m" (EVAL_FEATURE[x].us[32]),  "m" (EVAL_FEATURE[x].us[40]));
-
-	foreach_bit_32(x, fl) {
-		__asm__ (
-			"psubw	%0, %%xmm2\n\t"
-			"psubw	%1, %%xmm3\n\t"
-			"psubw	%2, %%xmm4\n\t"
-			"psubw	%3, %%xmm5\n\t"
-			"psubw	%4, %%xmm6\n\t"
-			"psubw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x].us[0]), "m" (EVAL_FEATURE[x].us[8]), "m" (EVAL_FEATURE[x].us[16]),
-		"m" (EVAL_FEATURE[x].us[24]), "m" (EVAL_FEATURE[x].us[32]), "m" (EVAL_FEATURE[x].us[40]));
-	}
-	foreach_bit_32(x, fh) {
-		__asm__ (
-			"psubw	%0, %%xmm2\n\t"
-			"psubw	%1, %%xmm3\n\t"
-			"psubw	%2, %%xmm4\n\t"
-			"psubw	%3, %%xmm5\n\t"
-			"psubw	%4, %%xmm6\n\t"
-			"psubw	%5, %%xmm7\n"
-		: :  "m" (EVAL_FEATURE[x + 32].us[0]), "m" (EVAL_FEATURE[x + 32].us[8]), "m" (EVAL_FEATURE[x + 32].us[16]),
-		"m" (EVAL_FEATURE[x + 32].us[24]), "m" (EVAL_FEATURE[x + 32].us[32]), "m" (EVAL_FEATURE[x + 32].us[40]));
-	}
-
-	__asm__ (
-		"movdqu	%%xmm2, %0\n\t"
-		"movdqu	%%xmm3, %1\n\t"
-		"movdqu	%%xmm4, %2\n\t"
-		"movdqu	%%xmm5, %3\n\t"
-		"movdqu	%%xmm6, %4\n\t"
-		"movdqu	%%xmm7, %5\n\t"
-	: :  "m" (eval->feature.us[0]), "m" (eval->feature.us[8]), "m" (eval->feature.us[16]),
-	"m" (eval->feature.us[24]), "m" (eval->feature.us[32]), "m" (eval->feature.us[40]));
-}
-
-<<<<<<< HEAD
-#endif // __SSE2__
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-=======
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-#endif // hasSSE2
-<<<<<<< HEAD
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-
-#ifdef hasSSE2
-
-/**
- * @brief Set up evaluation features from a board.
- *
- * @param eval  Evaluation function.
- * @param board Board to setup features from.
- */
-void eval_set(Eval *eval, const Board *board)
-{
-	int x;
-	unsigned long long b = (eval->n_empties & 1) ? board->opponent : board->player;
-	static const EVAL_FEATURE_V EVAL_FEATURE_all_opponent = {{
-		 9841,  9841,  9841,  9841, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524, 29524,
-		 3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  3280,  1093,  1093,
-		 1093,  1093,   364,   364,   364,   364,   121,   121,   121,   121,    40,    40,    40,    40,     0,     0
-	}};
-#ifdef __AVX2__
-	__m256i	f0 = EVAL_FEATURE_all_opponent.v16[0];
-	__m256i	f1 = EVAL_FEATURE_all_opponent.v16[1];
-	__m256i	f2 = EVAL_FEATURE_all_opponent.v16[2];
-
-	foreach_bit(x, b) {
-		f0 = _mm256_sub_epi16(f0, EVAL_FEATURE[x].v16[0]);
-		f1 = _mm256_sub_epi16(f1, EVAL_FEATURE[x].v16[1]);
-		f2 = _mm256_sub_epi16(f2, EVAL_FEATURE[x].v16[2]);
-	}
-	b = ~(board->opponent | board->player);
-	foreach_bit(x, b) {
-		f0 = _mm256_add_epi16(f0, EVAL_FEATURE[x].v16[0]);
-		f1 = _mm256_add_epi16(f1, EVAL_FEATURE[x].v16[1]);
-		f2 = _mm256_add_epi16(f2, EVAL_FEATURE[x].v16[2]);
-	}
-	eval->feature.v16[0] = f0;
-	eval->feature.v16[1] = f1;
-	eval->feature.v16[2] = f2;
-
-#else
-	__m128i	f0 = EVAL_FEATURE_all_opponent.v8[0];
-	__m128i	f1 = EVAL_FEATURE_all_opponent.v8[1];
-	__m128i	f2 = EVAL_FEATURE_all_opponent.v8[2];
-	__m128i	f3 = EVAL_FEATURE_all_opponent.v8[3];
-	__m128i	f4 = EVAL_FEATURE_all_opponent.v8[4];
-	__m128i	f5 = EVAL_FEATURE_all_opponent.v8[5];
-
-	foreach_bit(x, b) {
-		f0 = _mm_sub_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_sub_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_sub_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_sub_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_sub_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_sub_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-	b = ~(board->opponent | board->player);
-	foreach_bit(x, b) {
-		f0 = _mm_add_epi16(f0, EVAL_FEATURE[x].v8[0]);
-		f1 = _mm_add_epi16(f1, EVAL_FEATURE[x].v8[1]);
-		f2 = _mm_add_epi16(f2, EVAL_FEATURE[x].v8[2]);
-		f3 = _mm_add_epi16(f3, EVAL_FEATURE[x].v8[3]);
-		f4 = _mm_add_epi16(f4, EVAL_FEATURE[x].v8[4]);
-		f5 = _mm_add_epi16(f5, EVAL_FEATURE[x].v8[5]);
-	}
-
-	eval->feature.v8[0] = f0;
-	eval->feature.v8[1] = f1;
-	eval->feature.v8[2] = f2;
-	eval->feature.v8[3] = f3;
-	eval->feature.v8[4] = f4;
-	eval->feature.v8[5] = f5;
-#endif
-}
-
-/**
- * @brief Update the features after a player's move.
- *
- * @param eval  Evaluation function.
- * @param move  Move.
- */
-void eval_update(Eval *eval, const Move *move)
-{
-	assert(move->flipped);
-	if (eval->n_empties & 1)
-		eval_update_sse_1(eval, eval, move);
-	else
-		eval_update_sse_0(eval, eval, move);
-}
-
-void eval_update_leaf(Eval *eval_out, const Eval *eval_in, const Move *move)
-{
-	if (eval_in->n_empties & 1)
-		eval_update_sse_1(eval_out, eval_in, move);
-	else
-		eval_update_sse_0(eval_out, eval_in, move);
-}
-
-#endif // hasSSE2
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#endif
-=======
   #endif
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-}
-
-/**
- * @brief Update the features after a player's move.
- *
- * @param x     Move position.
- * @param f     Flipped bitboard.
- * @param eval  Evaluation function.
- */
-void eval_update(int x, unsigned long long f, Eval *eval)
-{
-	assert(f);
-	if (eval->n_empties & 1)
-		eval_update_sse_1(x, f, eval, eval);
-	else
-		eval_update_sse_0(x, f, eval, eval);
-}
-
-void eval_update_leaf(int x, unsigned long long f, Eval *eval_out, const Eval *eval_in)
-{
-	if (eval_in->n_empties & 1)
-		eval_update_sse_1(x, f, eval_out, eval_in);
-	else
-		eval_update_sse_0(x, f, eval_out, eval_in);
 }
 
-=======
->>>>>>> 6820748 (Unify eval_update_sse 0 & 1)
 #endif // hasSSE2
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/flip_avx512cd.c b/src/flip_avx512cd.c
index d76f2b3..36f8be6 100644
--- a/src/flip_avx512cd.c
+++ b/src/flip_avx512cd.c
@@ -1,59 +1,22 @@
 /**
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @file flip_avx512cd.c
-=======
- * @file flip_avx_lzcnt.c
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-=======
- * @file flip_avx512cd.c
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
  *
  * This module deals with flipping discs.
  *
  * For LSB to MSB directions, isolate LS1B can be used to determine
  * contiguous opponent discs.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * For MSB to LSB directions, LZCNT is used.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b10b8a4 (Add acepck's pcmpgtq flips (but not enabled))
  * Optimization ideas by acepck
  * https://github.com/Nyanyan/Egaroucid/pull/293
  *
  * @date 1998 - 2024
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * For horizontal MSB to LSB, LZCNT is used.
- * For the other MSB to LSB directions, byteswap then LS1B.
-=======
- * For MSB to LSB directions, LZCNT is used.
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
- *
- * @date 1998 - 2023
-=======
- * @date 1998 - 2024
->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut)
- * @author Toshihiko Okuhara
-<<<<<<< HEAD
- * @version 4.4
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-=======
- * @version 4.5
->>>>>>> 6cc30f9 (More avx512 optimization using mask register)
  */
 
 #include "bit.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 const V8DI lrmask[66] = {
 	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
@@ -121,150 +84,6 @@ const V8DI lrmask[66] = {
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const V4DI lmask_v4[66] = {
-=======
-const V4DI lmask_v4[66] = {
->>>>>>> 17f847d (Experimental BMI2/AVX2/AVX512 lastflip inlined in endgame_sse.c)
-	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400, 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800, 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000, 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000, 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000, 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000, 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000, 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000, 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000, 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000, 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000, 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000, 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000, 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000, 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000, 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000, 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000, 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000, 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000, 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000, 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000, 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000, 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000, 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000, 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000, 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000, 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000, 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000, 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000, 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000, 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000, 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000, 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000, 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000, 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000, 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000, 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000, 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000, 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000, 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000, 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000, 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000, 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000, 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000, 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000, 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000, 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000, 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000, 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000, 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000, 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000, 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000, 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000, 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-};
-
-const V4DI rmask_v4[66] = {
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002, 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004, 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008, 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010, 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020, 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040, 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080, 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101, 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202, 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404, 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808, 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010, 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020, 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040, 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080, 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101, 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202, 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404, 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808, 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010, 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020, 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040, 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080, 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101, 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202, 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404, 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808, 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010, 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020, 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040, 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080, 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101, 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202, 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404, 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808, 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010, 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020, 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040, 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080, 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101, 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202, 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404, 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808, 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010, 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020, 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040, 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080, 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101, 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202, 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404, 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808, 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010, 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020, 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040, 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 };
 
 /**
@@ -273,79 +92,13 @@ const V4DI rmask_v4[66] = {
  * @param pos player's move.
  * @param P player's disc pattern.
  * @param O opponent's disc pattern.
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @return partially reduced flipped disc pattern.
-=======
- * @return flipped disc pattern.
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-=======
  * @return partially reduced flipped disc pattern.
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	__m256i	PP = _mm256_broadcastq_epi64(OP);
-	__m256i	OO = _mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP));
-
-		// right: look for player (or edge) bit with lzcnt
-	__m256i	rM = lrmask[pos].v4[1];
-  #if 1 // use prove
-	__m256i t0 = _mm256_lzcnt_epi64(_mm256_andnot_si256(OO, rM));
-	t0 = _mm256_and_si256(_mm256_srlv_epi64(_mm256_set1_epi64x(0x8000000000000000), t0), PP);
-		// clear masked OO lower than outflank
-	// __m256i rF = _mm256_and_si256(_mm256_xor_si256(_mm256_sub_epi64(_mm256_setzero_si256(), tO), tO), rM);
-	__m256i rF = _mm256_ternarylogic_epi64(_mm256_sub_epi64(_mm256_setzero_si256(), t0), t0, rM, 0x28);
-
-  #else // use mask by acepck
-	__m256i	rP = _mm256_and_si256(PP, rM);
-		// shadow mask lower than leftmost P
-	__m256i	t0 = _mm256_srlv_epi64(_mm256_set1_epi64x(-1), _mm256_lzcnt_epi64(rP));
-		// apply flip if leftmost non-opponent is P
-	// __m256i rE = _mm256_andnot_si256(OO, _mm256_andnot_si256(rP, rM));
-	__m256i	rE = _mm256_ternarylogic_epi64(OO, rM, rP, 0x04);	// masked empty
-	__m256i rF = _mm256_maskz_andnot_epi64(_mm256_cmpgt_epi64_mask(rP, rE), t0, rM);
-  #endif
-		// left: look for non-opponent LS1B
-	__m256i	lM = lrmask[pos].v4[0];
-	__m256i	lO = _mm256_andnot_si256(OO, lM);
-  #if 1 // LS1B
-	// lO = _mm256_and_si256(lO, _mm256_sub_epi64(_mm256_setzero_si256(), lO));	// LS1B
-	// lO = _mm256_and_si256(lO, PP);
-	lO = _mm256_ternarylogic_epi64(lO, _mm256_sub_epi64(_mm256_setzero_si256(), lO), PP, 0x80);
-		// set all bits if outflank = 0, otherwise higher bits than outflank
-	__m256i lE = _mm256_sub_epi64(_mm256_cmpeq_epi64(lO, _mm256_setzero_si256()), lO);
-	// __m256i FF = _mm256_or_si256(rF, _mm256_andnot_si256(lE, lM));
-	__m256i FF = _mm256_ternarylogic_epi64(rF, lE, lM, 0xf2);
-
-  #else // BLSMSK
-	// __m256i t2 = _mm256_xor_si256(_mm256_add_epi64(lO, _mm256_set1_epi64x(-1)), lO);	// BLSMSK
-	// t2 = _mm256_and_si256(lM, t2);	// non-opponent LS1B and opponent inbetween
-	__m256i	t2 = _mm256_ternarylogic_epi64(lM, _mm256_add_epi64(lO, _mm256_set1_epi64x(-1)), lO, 0x60);
-		// apply flip if P is in mask, i.e. LS1B is P
-	// __m256i FF = _mm256_mask_or_epi64(rF, _mm256_test_epi64_mask(PP, t2), rF, _mm256_andnot_si256(PP, t2));
-	__m256i FF = _mm256_mask_ternarylogic_epi64(rF, _mm256_test_epi64_mask(PP, t2), PP, t2, 0xf2);
-  #endif
-
-	return _mm_or_si128(_mm256_castsi256_si128(FF), _mm256_extracti128_si256(FF, 1));
-}
-=======
-	__m256i	PP, OO, flip, outflank, mask, minusone;
-=======
-	__m256i	PP, OO, flip, outflank, mask;
-<<<<<<< HEAD
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-	__m128i	flip2;
-=======
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
 	__m256i	PP = _mm256_broadcastq_epi64(OP);
 	__m256i	OO = _mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP));
->>>>>>> b10b8a4 (Add acepck's pcmpgtq flips (but not enabled))
 
 		// right: look for player (or edge) bit with lzcnt
 	__m256i	rM = lrmask[pos].v4[1];
@@ -388,8 +141,3 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 
 	return _mm_or_si128(_mm256_castsi256_si128(FF), _mm256_extracti128_si256(FF, 1));
 }
-<<<<<<< HEAD
-
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-=======
->>>>>>> 17f847d (Experimental BMI2/AVX2/AVX512 lastflip inlined in endgame_sse.c)
diff --git a/src/flip_avx_cvtps.c b/src/flip_avx_cvtps.c
index 59ebc64..8b4f430 100644
--- a/src/flip_avx_cvtps.c
+++ b/src/flip_avx_cvtps.c
@@ -7,29 +7,13 @@
  * contiguous opponent discs.
  * For MSB to LSB directions, MS1B using floating point conversion is used.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
- * @date 1998 - 2024
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 #include "bit.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 const V8DI lrmask[66] = {
 	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
@@ -97,147 +81,6 @@ const V8DI lrmask[66] = {
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const V4DI lmask_v4[66] = {
-	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400, 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800, 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000, 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000, 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000, 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000, 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000, 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000, 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000, 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000, 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000, 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000, 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000, 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000, 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000, 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000, 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000, 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000, 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000, 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000, 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000, 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000, 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000, 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000, 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000, 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000, 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000, 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000, 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000, 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000, 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000, 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000, 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000, 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000, 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000, 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000, 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000, 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000, 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000, 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000, 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000, 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000, 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000, 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000, 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000, 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000, 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000, 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000, 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000, 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000, 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000, 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000, 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000, 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-};
-
-static const V4DI rmask_v4[66] = {
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002, 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004, 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008, 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010, 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020, 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040, 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080, 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101, 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202, 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404, 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808, 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010, 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020, 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040, 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080, 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101, 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202, 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404, 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808, 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010, 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020, 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040, 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080, 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101, 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202, 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404, 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808, 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010, 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020, 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040, 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080, 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101, 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202, 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404, 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808, 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010, 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020, 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040, 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080, 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101, 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202, 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404, 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808, 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010, 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020, 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040, 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080, 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101, 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202, 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404, 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808, 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010, 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020, 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040, 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 };
 
 /**
@@ -246,47 +89,19 @@ static const V4DI rmask_v4[66] = {
  * @param pos player's move.
  * @param P player's disc pattern.
  * @param O opponent's disc pattern.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @return partially reduced flipped disc pattern.
-=======
- * @return flipped disc pattern.
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
- * @return partially reduced flipped disc pattern.
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 {
 	__m256i	PP, OO, flip, outflank, mask;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	__m128i	flip2;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
 	const __m256 exp_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0xff800000));
 	const __m256i minusone = _mm256_set1_epi64x(-1);
 
 	PP = _mm256_broadcastq_epi64(OP);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b675196 (Replace VPERMQ due to MSVC's code and for Zen)
 	OO = _mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP));
 
 	mask = lrmask[pos].v4[1];
-=======
-	OO = _mm256_permute4x64_epi64(_mm256_castsi128_si256(OP), 0x55);
-
-<<<<<<< HEAD
-	mask = rmask_v4[pos].v4;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	mask = lrmask[pos].v4[1];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 		// look for non-opponent MS1B
 	outflank = _mm256_andnot_si256(OO, mask);
 		// MS1B_31 - clear mantissa to leave implicit MSB alone
@@ -299,15 +114,7 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 		// set all bits higher than outflank
 	flip = _mm256_and_si256(mask, _mm256_sub_epi64(_mm256_setzero_si256(), _mm256_add_epi64(outflank, outflank)));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	mask = lrmask[pos].v4[0];
-=======
-	mask = lmask_v4[pos].v4;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	mask = lrmask[pos].v4[0];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 		// look for non-opponent LS1B
 	outflank = _mm256_andnot_si256(OO, mask);
 	outflank = _mm256_andnot_si256(_mm256_add_epi64(outflank, minusone), outflank);	// LS1B
@@ -318,21 +125,6 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 	outflank = _mm256_add_epi64(outflank, _mm256_srli_epi64(outflank, 63));
 	flip = _mm256_or_si256(flip, _mm256_and_si256(outflank, mask));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-=======
-	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-	flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-
-	return flip2;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	return flip;
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
 	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
 }
 
diff --git a/src/flip_avx_lzcnt.c b/src/flip_avx_lzcnt.c
index 487495e..b08b503 100644
--- a/src/flip_avx_lzcnt.c
+++ b/src/flip_avx_lzcnt.c
@@ -8,29 +8,13 @@
  * For horizontal MSB to LSB, LZCNT is used.
  * For the other MSB to LSB directions, byteswap then LS1B.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
- * @date 1998 - 2024
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 #include "bit.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 const V8DI lrmask[66] = {
 	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
@@ -98,147 +82,6 @@ const V8DI lrmask[66] = {
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const V4DI lmask_v4[66] = {
-	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400, 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800, 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000, 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000, 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000, 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000, 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000, 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000, 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000, 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000, 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000, 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000, 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000, 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000, 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000, 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000, 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000, 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000, 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000, 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000, 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000, 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000, 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000, 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000, 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000, 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000, 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000, 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000, 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000, 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000, 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000, 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000, 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000, 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000, 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000, 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000, 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000, 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000, 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000, 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000, 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000, 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000, 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000, 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000, 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000, 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000, 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000, 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000, 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000, 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000, 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000, 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000, 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000, 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-};
-
-static const V4DI rmask_v4[66] = {
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002, 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004, 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008, 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010, 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020, 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040, 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080, 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101, 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202, 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404, 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808, 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010, 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020, 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040, 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080, 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101, 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202, 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404, 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808, 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010, 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020, 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040, 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080, 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101, 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202, 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404, 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808, 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010, 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020, 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040, 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080, 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101, 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202, 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404, 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808, 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010, 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020, 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040, 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080, 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101, 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202, 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404, 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808, 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010, 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020, 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040, 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080, 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101, 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202, 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404, 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808, 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010, 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020, 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040, 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 };
 
 /**
@@ -247,15 +90,7 @@ static const V4DI rmask_v4[66] = {
  * @param pos player's move.
  * @param P player's disc pattern.
  * @param O opponent's disc pattern.
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @return partially reduced flipped disc pattern.
-=======
- * @return flipped disc pattern.
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
  * @return partially reduced flipped disc pattern.
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 #if __GNUC__ == 4 && __GNUC_MINOR__ == 7
@@ -265,36 +100,14 @@ static const V4DI rmask_v4[66] = {
 __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 {
 	__m256i	PP, mOO, flip, outflank, mask, ocontig;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	__m128i	outflank1;
 	const __m256i mbswapll = _mm256_broadcastsi128_si256(_mm_set_epi64x(0x08090a0b0c0d0e0f, 0x0001020304050607));
 
 	PP = _mm256_broadcastq_epi64(OP);
 	mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP)),
-<<<<<<< HEAD
 		_mm256_set_epi64x(0x007e7e7e7e7e7e00, 0x007e7e7e7e7e7e00, 0x00ffffffffffff00, 0x7e7e7e7e7e7e7e7e));	// (sentinel on the edge)
 
 	mask = lrmask[pos].v4[1];
-=======
-	__m128i	flip2, outflank1;
-=======
-	__m128i	outflank1;
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-	const __m256i mbswapll = _mm256_broadcastsi128_si256(_mm_set_epi64x(0x08090a0b0c0d0e0f, 0x0001020304050607));
-
-	PP = _mm256_broadcastq_epi64(OP);
-	mOO = _mm256_and_si256(_mm256_permute4x64_epi64(_mm256_castsi128_si256(OP), 0x55),
-=======
->>>>>>> b675196 (Replace VPERMQ due to MSVC's code and for Zen)
-		_mm256_set_epi64x(0x007e7e7e7e7e7e00, 0x007e7e7e7e7e7e00, 0x00ffffffffffff00, 0x7e7e7e7e7e7e7e7e));	// (sentinel on the edge)
-
-<<<<<<< HEAD
-	mask = rmask_v4[pos].v4;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	mask = lrmask[pos].v4[1];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 	ocontig = _mm256_andnot_si256(mOO, mask);
 		// -1 (CPU)
 	outflank1 = _mm_cvtsi64_si128(0x8000000000000000ULL >> lzcnt_u64(_mm_cvtsi128_si64(_mm256_castsi256_si128(ocontig))));
@@ -307,19 +120,7 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 		// set all bits higher than outflank
 	flip = _mm256_and_si256(_mm256_sub_epi64(_mm256_setzero_si256(), _mm256_add_epi64(outflank, outflank)), mask);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	mask = lrmask[pos].v4[0];
-=======
-	mask = lmask_v4[pos].v4;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	mask = lrmask_v4[pos].v4[0];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
-=======
 	mask = lrmask[pos].v4[0];
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		// look for non-opponent (or edge) bit
 	ocontig = _mm256_andnot_si256(mOO, mask);
 	ocontig = _mm256_and_si256(ocontig, _mm256_sub_epi64(_mm256_setzero_si256(), ocontig));	// LS1B
@@ -328,21 +129,6 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 	outflank = _mm256_add_epi64(outflank, _mm256_cmpeq_epi64(outflank, ocontig));
 	flip = _mm256_or_si256(flip, _mm256_and_si256(outflank, mask));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-=======
-	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-	flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-
-	return flip2;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	return flip;
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
 	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
 }
 
diff --git a/src/flip_avx_ppfill.c b/src/flip_avx_ppfill.c
index c75b611..c5bb105 100644
--- a/src/flip_avx_ppfill.c
+++ b/src/flip_avx_ppfill.c
@@ -8,35 +8,13 @@
  * For MSB to LSB directions, parallel prefix fill is used to isolate
  * MS1B.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  * @date 1998 - 2024
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
-=======
- * @date 1998 - 2023
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
- * @author Toshihiko Okuhara
-<<<<<<< HEAD
- * @version 4.4
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
- * @version 4.5
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  */
 
 #include "bit.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 const V8DI lrmask[66] = {
 	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
@@ -104,150 +82,6 @@ const V8DI lrmask[66] = {
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const V4DI lmask_v4[66] = {
-=======
-const V4DI lmask_v4[66] = {
->>>>>>> 17f847d (Experimental BMI2/AVX2/AVX512 lastflip inlined in endgame_sse.c)
-	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400, 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800, 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000, 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000, 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000, 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000, 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000, 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000, 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000, 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000, 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000, 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000, 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000, 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000, 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000, 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000, 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000, 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000, 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000, 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000, 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000, 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000, 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000, 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000, 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000, 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000, 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000, 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000, 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000, 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000, 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000, 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000, 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000, 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000, 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000, 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000, 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000, 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000, 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000, 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000, 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000, 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000, 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000, 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000, 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000, 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000, 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000, 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000, 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000, 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000, 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000, 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000, 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000, 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-};
-
-const V4DI rmask_v4[66] = {
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002, 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004, 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008, 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010, 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020, 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040, 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080, 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101, 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202, 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404, 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808, 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010, 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020, 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040, 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080, 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101, 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202, 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404, 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808, 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010, 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020, 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040, 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080, 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101, 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202, 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404, 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808, 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010, 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020, 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040, 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080, 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101, 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202, 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404, 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808, 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010, 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020, 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040, 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080, 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101, 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202, 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404, 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808, 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010, 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020, 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040, 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080, 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101, 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202, 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404, 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808, 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010, 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020, 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040, 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 };
 
 /**
@@ -256,52 +90,18 @@ const V4DI rmask_v4[66] = {
  * @param pos player's move.
  * @param P player's disc pattern.
  * @param O opponent's disc pattern.
-<<<<<<< HEAD
-<<<<<<< HEAD
- * @return partially reduced flipped disc pattern.
-=======
- * @return flipped disc pattern.
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
  * @return partially reduced flipped disc pattern.
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	__m256i	PP, OO, flip, outflank, eraser, mask;
-<<<<<<< HEAD
 
 	PP = _mm256_broadcastq_epi64(OP);
 	OO = _mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP));
 
 	mask = lrmask[pos].v4[1];
 		// isolate non-opponent MS1B by clearing lower shadow bits
-=======
-	__m256i	PP, OO, flip, outflank, eraser, mask, minusone;
-=======
-	__m256i	PP, OO, flip, outflank, eraser, mask;
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-	__m128i	flip2;
-=======
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-
-	PP = _mm256_broadcastq_epi64(OP);
-	OO = _mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP));
-
-<<<<<<< HEAD
-	mask = rmask_v4[pos].v4;
-<<<<<<< HEAD
-		// isolate non-opponent MS1B by clearing lower bits
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-=======
-	mask = lrmask[pos].v4[1];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
-		// isolate non-opponent MS1B by clearing lower shadow bits
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
 	eraser = _mm256_andnot_si256(OO, mask);
 #if 0 // blute force parallel prefix fill
 	outflank = _mm256_and_si256(PP, mask);
@@ -310,8 +110,6 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 4));
 	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 8));
 	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 16));
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank = _mm256_andnot_si256(eraser, _mm256_add_epi64(outflank, outflank));
 	outflank = _mm256_andnot_si256(_mm256_srli_epi64(eraser, 32), outflank);
 #else // clear valid bits only using variable shift
@@ -336,82 +134,3 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 
 	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
 }
-=======
-	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 32));
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-	outflank = _mm256_andnot_si256(eraser, _mm256_add_epi64(outflank, outflank));
-	outflank = _mm256_andnot_si256(_mm256_srli_epi64(eraser, 32), outflank);
-#else // clear valid bits only using variable shift
-	outflank = _mm256_sllv_epi64(_mm256_and_si256(PP, mask), _mm256_set_epi64x(7, 9, 8, 1));
-	eraser = _mm256_or_si256(eraser, _mm256_srlv_epi64(eraser, _mm256_set_epi64x(7, 9, 8, 1)));
-	outflank = _mm256_andnot_si256(eraser, outflank);
-	eraser = _mm256_srlv_epi64(eraser, _mm256_set_epi64x(14, 18, 16, 2));
-	outflank = _mm256_andnot_si256(eraser, outflank);
-	outflank = _mm256_andnot_si256(_mm256_srlv_epi64(eraser, _mm256_set_epi64x(14, 18, 16, 2)), outflank);
-#endif
-		// set mask bits higher than outflank
-	flip = _mm256_and_si256(mask, _mm256_sub_epi64(_mm256_setzero_si256(), outflank));
-
-	mask = lrmask[pos].v4[0];
-		// look for non-opponent LS1B
-	outflank = _mm256_andnot_si256(OO, mask);
-	outflank = _mm256_and_si256(outflank, _mm256_sub_epi64(_mm256_setzero_si256(), outflank));	// LS1B
-	outflank = _mm256_and_si256(outflank, PP);
-		// set all bits if outflank = 0, otherwise higher bits than outflank
-	eraser = _mm256_sub_epi64(_mm256_cmpeq_epi64(outflank, _mm256_setzero_si256()), outflank);
-	flip = _mm256_or_si256(flip, _mm256_andnot_si256(eraser, mask));
-
-	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-}
-<<<<<<< HEAD
-
-<<<<<<< HEAD
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-#if 0 // experimental AVX2 lastflip version in endgame_sse.c
-__m128i vectorcall mm_LastFlip(const __m256i PP, int pos)
-{
-	__m256i	flip, outflank, eraser, mask;
-	__m128i	flip2;
-
-	mask = rmask_v4[pos].v4;
-		// isolate non-opponent MS1B by clearing lower shadow bits
-	eraser = _mm256_and_si256(PP, mask);
-#if 0 // blute force parallel prefix fill
-	outflank = _mm256_add_epi64(eraser, eraser);
-	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 1));
-	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 2));
-	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 4));
-	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 8));
-	eraser = _mm256_or_si256(eraser, _mm256_srli_epi64(eraser, 16));
-	outflank = _mm256_andnot_si256(eraser, outflank);
-	outflank = _mm256_andnot_si256(_mm256_srli_epi64(eraser, 32), outflank);
-#else // clear valid bits only using variable shift
-	outflank = _mm256_sllv_epi64(eraser, _mm256_set_epi64x(7, 9, 8, 1));
-	eraser = _mm256_or_si256(eraser, _mm256_srlv_epi64(eraser, _mm256_set_epi64x(7, 9, 8, 1)));
-	outflank = _mm256_andnot_si256(eraser, outflank);
-	outflank = _mm256_andnot_si256(_mm256_srlv_epi64(eraser, _mm256_set_epi64x(14, 18, 16, 2)), outflank);
-	outflank = _mm256_andnot_si256(_mm256_srlv_epi64(eraser, _mm256_set_epi64x(28, 36, 32, 4)), outflank);
-#endif
-		// set mask bits higher than outflank
-	flip = _mm256_and_si256(mask, _mm256_sub_epi64(_mm256_setzero_si256(), outflank));
-
-	mask = lmask_v4[pos].v4;
-		// look for non-opponent LS1B
-	outflank = _mm256_and_si256(PP, mask);
-	outflank = _mm256_and_si256(outflank, _mm256_sub_epi64(_mm256_setzero_si256(), outflank));	// LS1B
-	outflank = _mm256_and_si256(outflank, PP);
-		// set all bits if outflank = 0, otherwise higher bits than outflank
-	eraser = _mm256_sub_epi64(_mm256_cmpeq_epi64(outflank, _mm256_setzero_si256()), outflank);
-	flip = _mm256_or_si256(flip, _mm256_andnot_si256(eraser, mask));
-
-	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-	flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-
-	return flip2;
-}
-#endif
->>>>>>> 9ea5b5e (BMI2 and mm_LastFlip version of board_score_sse_1 added (but not enabled))
-=======
->>>>>>> 17f847d (Experimental BMI2/AVX2/AVX512 lastflip inlined in endgame_sse.c)
diff --git a/src/flip_avx_ppseq.c b/src/flip_avx_ppseq.c
index c327d68..0033117 100644
--- a/src/flip_avx_ppseq.c
+++ b/src/flip_avx_ppseq.c
@@ -8,21 +8,9 @@
  * For MSB to LSB directions, sequencial search with parallel prefix
  * is used.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
  * @author Toshihiko Okuhara
  * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Toshihiko Okuhara
- * @version 4.4
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
- * @date 1998 - 2024
- * @author Toshihiko Okuhara
- * @version 4.5
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 #include "bit.h"
@@ -102,45 +90,19 @@ const V4DI lmask_v4[66] = {
  * @param pos player's move.
  * @param P player's disc pattern.
  * @param O opponent's disc pattern.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @return partially reduced flipped disc pattern.
-=======
- * @return flipped disc pattern.
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
- * @return partially reduced flipped disc pattern.
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
  */
 
 __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 {
 	__m256i	PP, mOO, flip, shift2, pre, outflank, mask, ocontig;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
-
-	PP = _mm256_broadcastq_epi64(OP);
-	mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP)),
-		_mm256_set_epi64x(0x007e7e7e7e7e7e00, 0x007e7e7e7e7e7e00, 0x00ffffffffffff00, 0x7e7e7e7e7e7e7e7e));	// (sentinel on the edge)
-
-	ocontig = _mm256_set1_epi64x(X_TO_BIT[pos]);
-=======
-	__m128i	flip2;
-=======
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
 	const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1);
 
 	PP = _mm256_broadcastq_epi64(OP);
 	mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP)),
 		_mm256_set_epi64x(0x007e7e7e7e7e7e00, 0x007e7e7e7e7e7e00, 0x00ffffffffffff00, 0x7e7e7e7e7e7e7e7e));	// (sentinel on the edge)
 
-<<<<<<< HEAD
-	ocontig = _mm256_broadcastq_epi64(*(__m128i *) &X_TO_BIT[pos]);
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
 	ocontig = _mm256_set1_epi64x(X_TO_BIT[pos]);
->>>>>>> 88b2057 (Replace broadcast from memory with set1)
 	ocontig = _mm256_and_si256(mOO, _mm256_srlv_epi64(ocontig, shift1897));
 	ocontig = _mm256_or_si256(ocontig, _mm256_and_si256(mOO, _mm256_srlv_epi64(ocontig, shift1897)));
 	pre = _mm256_and_si256(mOO, _mm256_srlv_epi64(mOO, shift1897));	// parallel prefix
@@ -159,21 +121,6 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
 	outflank = _mm256_add_epi64(outflank, _mm256_cmpeq_epi64(outflank, ocontig));
 	flip = _mm256_or_si256(flip, _mm256_and_si256(outflank, mask));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-=======
-	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-	flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-
-	return flip2;
->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added)
-=======
-	return flip;
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
-=======
 	return _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially)
 }
 
diff --git a/src/flip_avx_shuf_max.c b/src/flip_avx_shuf_max.c
index 2057daf..83976f2 100644
--- a/src/flip_avx_shuf_max.c
+++ b/src/flip_avx_shuf_max.c
@@ -14,10 +14,6 @@
 
 #include "bit.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 const V8DI lrmask[66] = {
 	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
@@ -85,147 +81,6 @@ const V8DI lrmask[66] = {
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const V4DI lmask_v4[66] = {
-	{{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400, 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800, 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000, 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000, 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000, 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000, 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000, 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000, 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000, 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000, 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000, 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000, 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000, 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000, 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000, 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000, 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000, 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000, 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000, 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000, 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000, 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000, 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000, 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000, 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000, 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000, 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000, 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000, 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000, 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000, 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000, 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000, 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000, 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000, 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000, 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000, 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000, 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000, 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000, 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000, 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000, 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000, 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000, 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000, 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000, 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000, 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000, 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000, 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000, 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000, 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000, 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000, 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000, 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
-};
-
-static const V4DI rmask_v4[66] = {
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002, 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004, 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008, 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010, 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020, 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040, 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080, 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101, 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202, 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404, 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808, 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010, 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020, 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040, 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080, 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101, 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202, 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404, 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808, 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010, 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020, 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040, 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080, 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101, 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202, 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404, 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808, 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010, 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020, 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040, 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080, 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101, 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202, 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404, 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808, 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010, 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020, 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040, 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080, 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101, 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202, 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404, 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808, 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010, 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020, 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040, 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080, 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101, 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202, 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404, 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808, 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010, 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020, 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040, 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080, 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-=======
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 };
 
 /**
@@ -237,43 +92,16 @@ static const V4DI rmask_v4[66] = {
  * @return flipped disc pattern.
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-__m256i vectorcall mm_Flip(const __m128i OP, int pos)
-{
-	__m256i	PP, OO, flip, outflank, eraser, mask;
-=======
-__m128i vectorcall mm_Flip(const __m128i OP, int pos)
-=======
 __m256i vectorcall mm_Flip(const __m128i OP, int pos)
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 {
 	__m256i	PP, OO, flip, outflank, eraser, mask;
-<<<<<<< HEAD
-	__m128i	flip2;
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-=======
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
 	const __m256i mask0F0F = _mm256_set1_epi16(0x0F0F);
 	const __m256i ms1bL = _mm256_broadcastsi128_si256(_mm_set_epi64x(0x0808080808080808, 0x0404040402020100));
 
 	PP = _mm256_broadcastq_epi64(OP);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b675196 (Replace VPERMQ due to MSVC's code and for Zen)
 	OO = _mm256_broadcastq_epi64(_mm_unpackhi_epi64(OP, OP));
 
 	mask = lrmask[pos].v4[1];
-=======
-	OO = _mm256_permute4x64_epi64(_mm256_castsi128_si256(OP), 0x55);
-
-<<<<<<< HEAD
-	mask = rmask_v4[pos].v4;
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-=======
-	mask = lrmask[pos].v4[1];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 		// look for non-opponent MS1B
 	outflank = _mm256_andnot_si256(OO, mask);
 		// mask to clear low half if high half != 0 in word/dword/qword
@@ -288,15 +116,7 @@ __m256i vectorcall mm_Flip(const __m128i OP, int pos)
 		// set all bits higher than outflank
 	flip = _mm256_and_si256(_mm256_sub_epi64(_mm256_setzero_si256(), _mm256_add_epi64(outflank, outflank)), mask);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	mask = lrmask[pos].v4[0];
-=======
-	mask = lmask_v4[pos].v4;
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-=======
 	mask = lrmask[pos].v4[0];
->>>>>>> 85955bf (lazy high cut version of board_score_sse_1)
 		// look for non-opponent LS1B
 	outflank = _mm256_andnot_si256(OO, mask);
 	outflank = _mm256_and_si256(outflank, _mm256_sub_epi64(_mm256_setzero_si256(), outflank));	// LS1B
@@ -305,17 +125,6 @@ __m256i vectorcall mm_Flip(const __m128i OP, int pos)
 	eraser = _mm256_sub_epi64(_mm256_cmpeq_epi64(outflank, _mm256_setzero_si256()), outflank);
 	flip = _mm256_or_si256(flip, _mm256_andnot_si256(eraser, mask));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	return flip;
-=======
-	flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1));
-	flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e));	// SWAP64
-
-	return flip2;
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-=======
 	return flip;
->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c)
 }
 
diff --git a/src/flip_bitscan.c b/src/flip_bitscan.c
index e7f725a..26827ef 100644
--- a/src/flip_bitscan.c
+++ b/src/flip_bitscan.c
@@ -38,54 +38,17 @@
  * If the OUTFLANK search is in MSB to LSB direction, lzcnt64 is used if 
  * available, or __builtin_bswap is used to use carry propagation backwards.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2020
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2018
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
- * @date 1998 - 2020
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.4
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#include "bit_intrinsics.h"
-=======
-#include "bit.h"
-=======
 #include "bit_intrinsics.h"
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-/** outflank array (indexed with inner 6 bits) */
-/* static const unsigned char OUTFLANK_0[64] = {
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x20,
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x40,
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x20,
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x80
-}; */
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-#define LODWORD(l) ((unsigned int)(l))
-#define HIDWORD(l) ((unsigned int)((l)>>32))
 
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 #define LODWORD(l) ((unsigned int)(l))
 #define HIDWORD(l) ((unsigned int)((l)>>32))
 
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 /** rotated outflank array (indexed with inner 6 bits) */
 static const unsigned char OUTFLANK_2[64] = {	// ...ahgfe
 	0x00, 0x10, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x02, 0x12, 0x00, 0x00,
@@ -115,10 +78,6 @@ static const unsigned char OUTFLANK_5[64] = {	// ...dcbah
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 /** flip array (indexed with rotated outflank) */
 static const unsigned long long FLIPPED_2_H[25] = {	// ...ahgfe
 	0x0000000000000000, 0x0808080808080808, 0x1818181818181818, 0x0000000000000000,
@@ -128,29 +87,8 @@ static const unsigned long long FLIPPED_2_H[25] = {	// ...ahgfe
 	0x0202020202020202, 0x0a0a0a0a0a0a0a0a, 0x1a1a1a1a1a1a1a1a, 0x0000000000000000,
 	0x3a3a3a3a3a3a3a3a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x7a7a7a7a7a7a7a7a
-<<<<<<< HEAD
-};
-=======
-/* static const unsigned char OUTFLANK_6[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
 
-static const unsigned char OUTFLANK_7[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01
-}; */
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-
-<<<<<<< HEAD
-=======
-};
-
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 static const unsigned long long FLIPPED_2_V[25] = {
 	0x0000000000000000, 0x00000000ff000000, 0x000000ffff000000, 0x0000000000000000,
 	0x0000ffffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -159,7 +97,6 @@ static const unsigned long long FLIPPED_2_V[25] = {
 	0x000000000000ff00, 0x00000000ff00ff00, 0x000000ffff00ff00, 0x0000000000000000,
 	0x0000ffffff00ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x00ffffffff00ff00
-<<<<<<< HEAD
 };
 
 static const unsigned long long FLIPPED_3_H[21] = {	// ...bahgf
@@ -212,10 +149,7 @@ static const unsigned long long FLIPPED_5_V[18] = {
 	0x000000ff00000000, 0x00ff00ff00000000
 };
 
-<<<<<<< HEAD
-#include "bit.h"
 
-<<<<<<< HEAD
 /*
  * Set all bits below the sole outflank bit if outfrank != 0
  */
@@ -226,103 +160,6 @@ static inline unsigned long long OutflankToFlipmask(unsigned long long outflank)
 	return __builtin_addcll(flipmask, 0, cy, &cy);
 }
 #elif (defined(_M_X64) && (_MSC_VER >= 1800)) || (defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)))
-=======
-/** flip array (indexed with outflank) */
-static const unsigned long long FLIPPED_2_H[130] = {
-	0x0000000000000000, 0x0202020202020202, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0808080808080808, 0x0a0a0a0a0a0a0a0a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x1818181818181818, 0x1a1a1a1a1a1a1a1a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x3838383838383838, 0x3a3a3a3a3a3a3a3a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x7878787878787878, 0x7a7a7a7a7a7a7a7a
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-};
-
-static const unsigned long long FLIPPED_3_H[21] = {	// ...bahgf
-	0x0000000000000000, 0x1010101010101010, 0x3030303030303030, 0x0000000000000000,
-	0x7070707070707070, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0606060606060606, 0x1616161616161616, 0x3636363636363636, 0x0000000000000000,
-	0x7676767676767676, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0404040404040404, 0x1414141414141414, 0x3434343434343434, 0x0000000000000000,
-	0x7474747474747474
-};
-
-static const unsigned long long FLIPPED_3_V[21] = {
-	0x0000000000000000, 0x000000ff00000000, 0x0000ffff00000000, 0x0000000000000000,
-	0x00ffffff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000ffff00, 0x000000ff00ffff00, 0x0000ffff00ffff00, 0x0000000000000000,
-	0x00ffffff00ffff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000ff0000, 0x000000ff00ff0000, 0x0000ffff00ff0000, 0x0000000000000000, 
-	0x00ffffff00ff0000
-};
-
-static const unsigned long long FLIPPED_4_H[19] = {	// ...cbahg
-	0x0000000000000000, 0x2020202020202020, 0x6060606060606060, 0x0000000000000000,
-	0x0e0e0e0e0e0e0e0e, 0x2e2e2e2e2e2e2e2e, 0x6e6e6e6e6e6e6e6e, 0x0000000000000000,
-	0x0c0c0c0c0c0c0c0c, 0x2c2c2c2c2c2c2c2c, 0x6c6c6c6c6c6c6c6c, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0808080808080808, 0x2828282828282828, 0x6868686868686868
-};
-
-static const unsigned long long FLIPPED_4_V[19] = {
-	0x0000000000000000, 0x0000ff0000000000, 0x00ffff0000000000, 0x0000000000000000,
-	0x00000000ffffff00, 0x0000ff00ffffff00, 0x00ffff00ffffff00, 0x0000000000000000,
-	0x00000000ffff0000, 0x0000ff00ffff0000, 0x00ffff00ffff0000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ff000000, 0x0000ff00ff000000, 0x00ffff00ff000000
-};
-
-static const unsigned long long FLIPPED_5_H[18] = {	// ...dcbah
-	0x0000000000000000, 0x4040404040404040, 0x1e1e1e1e1e1e1e1e, 0x5e5e5e5e5e5e5e5e,
-	0x1c1c1c1c1c1c1c1c, 0x5c5c5c5c5c5c5c5c, 0x0000000000000000, 0x0000000000000000,
-	0x1818181818181818, 0x5858585858585858, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x1010101010101010, 0x5050505050505050
-};
-
-static const unsigned long long FLIPPED_5_V[18] = {
-	0x0000000000000000, 0x00ff000000000000, 0x000000ffffffff00, 0x00ff00ffffffff00,
-	0x000000ffffff0000, 0x00ff00ffffff0000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ffff000000, 0x00ff00ffff000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ff00000000, 0x00ff00ff00000000
-};
-
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(_M_X64) && (_MSC_VER >= 1800)
->>>>>>> 6506166 (More SSE optimizations)
-=======
-=======
-/*
- * Set all bits below the sole outflank bit if outfrank != 0
- */
->>>>>>> 1525ec4 (Use same OutflankToFlip as flip_bitscan, and fix typo bug)
-#if __has_builtin(__builtin_subcll)
-static inline unsigned long long OutflankToFlipmask(unsigned long long outflank) {
-	unsigned long long flipmask, cy;
-	flipmask = __builtin_subcll(outflank, 1, 0, &cy);
-	return __builtin_addcll(flipmask, 0, cy, &cy);
-}
-<<<<<<< HEAD
-#elif (defined(_M_X64) && (_MSC_VER >= 1800)) // || (defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2))) // not tested
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-#elif (defined(_M_X64) && (_MSC_VER >= 1800)) || (defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)))
->>>>>>> 1525ec4 (Use same OutflankToFlip as flip_bitscan, and fix typo bug)
 static inline unsigned long long OutflankToFlipmask(unsigned long long outflank) {
 	unsigned long long flipmask;
 	unsigned char cy = _subborrow_u64(0, outflank, 1, &flipmask);
@@ -330,8 +167,6 @@ static inline unsigned long long OutflankToFlipmask(unsigned long long outflank)
 	return flipmask;
 }
 #else
-<<<<<<< HEAD
-<<<<<<< HEAD
 	#define OutflankToFlipmask(outflank)	((outflank) - (unsigned int) ((outflank) != 0))
 #endif
 
@@ -345,120 +180,17 @@ static inline unsigned long long OutflankToFlipmask(unsigned long long outflank)
 	//	return vertical_mirror(rOM & (-rOM));
 	// }
 	#define	outflank_right(O,maskr)	(vertical_mirror(vertical_mirror((O) | ~(maskr)) + 1) & (maskr))
-=======
-#if defined(_M_X64) && !defined(__AVX2__)
-static inline int __builtin_clzll(unsigned long long n) {	// n != 0
-=======
-=======
-	#define OutflankToFlipmask(x)	((x) - (unsigned int) ((x) != 0))
-#endif
-
-<<<<<<< HEAD
->>>>>>> 6506166 (More SSE optimizations)
-#if 0 // defined(_MSC_VER) && defined(_M_X64) && !defined(__AVX2__)
-static inline int _lzcnt_u64(unsigned long long n) {
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-#if defined(_MSC_VER) && !(defined(__AVX2__) || defined(__LZCNT__))
-static inline int lzcnt_u32(unsigned long n) {
-	unsigned long i;
-	if (!_BitScanReverse(&i, n))
-		i = -1;
-	return 31 - i;
-}
-
-#ifdef _M_X64
-static inline int lzcnt_u64(unsigned long long n) {
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-	unsigned long i;
-	if (!_BitScanReverse64(&i, n))
-		i = -1;
-	return 63 - i;
-}
-#else
-static inline int lzcnt_u64(unsigned long long n) {
-	unsigned long i;
-	if (_BitScanReverse(&i, n >> 32))
-		return 31 - i;
-	if (_BitScanReverse(&i, (unsigned int) n))
-		return 63 - i;
-	return -1;
-}
-#endif
-#else
-#define	lzcnt_u32(x)	_lzcnt_u32(x)
-#define	lzcnt_u64(x)	_lzcnt_u64(x)
-#endif
-
-#if (defined(__x86_64__) && (defined(__AVX2__) || defined(__LZCNT__))) || defined(_MSC_VER)
-	// Strictly, (long long) >> 64 is undefined in C, but either 0 bit (no change)
-	// or 64 bit (zero out) shift will lead valid result (i.e. flipped == 0).
-<<<<<<< HEAD
-	#define	outflank_right(O,maskr)	(0x8000000000000000ULL >> _lzcnt_u64(~(O) & (maskr)))
-#elif defined(vertical_mirror)	// bswap to use carry propagation backwards
-<<<<<<< HEAD
-	#define	outflank_right(O,maskr,masko)	(vertical_mirror(vertical_mirror((O) | ~(maskr)) + 1) & (maskr))
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-=======
-	#define	outflank_right_H(O,maskr)	(0x8000000000000000ULL >> lzcnt_u64(~(O) & (maskr)))
-#else	// with guardian bit to avoid __builtin_clz(0)
-	#define	outflank_right_H(O,maskr)	(0x8000000000000000ULL >> __builtin_clzll(((O) & (((maskr) & ((maskr) - 1)))) ^ (maskr)))
-#endif
-
-#if ((defined(__x86_64__) || defined(_M_X64)) && (defined(__AVX2__) || defined(__LZCNT__))) || !defined(vertical_mirror)
-	#define outflank_right(O,maskr)	outflank_right_H((O),(maskr))
-#else	// bswap to use carry propagation backwards - cannot be used for horizontal right
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	#define OutflankToFlipmask(outflank)	((outflank) - (unsigned int) ((outflank) != 0))
-#endif
-
-#if ((defined(__x86_64__) || defined(USE_GAS_X86)) && defined(__LZCNT__)) || defined(_MSC_VER)
-	// Strictly, (long long) >> 64 is undefined in C, but either 0 bit (no change)
-	// or 64 bit (zero out) shift will lead valid result (i.e. flipped == 0).
-	#define	outflank_right(O,maskr)	(0x8000000000000000ULL >> lzcnt_u64(~(O) & (maskr)))
-#elif defined(vertical_mirror)	// bswap to use carry propagation backwards - cannot be used for horizontal right
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	// static inline unsigned long long outflank_right(unsigned long long O, unsigned long long maskr) {
-	//	unsigned long long rOM = vertical_mirror(~(O) & maskr);
-	//	return vertical_mirror(rOM & (-rOM));
-	// }
-	#define	outflank_right(O,maskr)	(vertical_mirror(vertical_mirror((O) | ~(maskr)) + 1) & (maskr))
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 6506166 (More SSE optimizations)
-#else	// with guardian bit to avoid __builtin_clz(0)
-	#define	outflank_right(O,maskr)	(0x8000000000000000ULL >> __builtin_clzll(((O) & (((maskr) & ((maskr) - 1)))) ^ (maskr)))
-=======
-=======
 #else	// with guardian bit to avoid __builtin_clz(0)
 	#define	outflank_right(O,maskr)	(0x8000000000000000ULL >> __builtin_clzll(((O) & (((maskr) & ((maskr) - 1)))) ^ (maskr)))
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 #endif
 
 // in case continuous from MSB
 #if defined(__AVX2__) || defined(__LZCNT__) || defined(_MSC_VER)
 	#define	outflank_right_H(O)	(0x80000000u >> lzcnt_u32(~(O)))
 #else
-<<<<<<< HEAD
-	#define	outflank_right_32(O,maskr)	(0x80000000u >> __builtin_clz(((O) & (((maskr) & ((maskr) - 1)))) ^ (maskr)))
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
 	#define	outflank_right_H(O)	(0x80000000u >> __builtin_clz(~(O)))
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 #endif
 
-<<<<<<< HEAD
-// in case continuous from MSB
-#if defined(__AVX2__) || defined(__LZCNT__) || defined(_MSC_VER)
-	#define	outflank_right_H(O)	(0x80000000u >> lzcnt_u32(~(O)))
-#else
-	#define	outflank_right_H(O)	(0x80000000u >> __builtin_clz(~(O)))
-#endif
-
-=======
->>>>>>> 6506166 (More SSE optimizations)
 
 /**
  * Compute flipped discs when playing on square A1.
@@ -478,18 +210,8 @@ static unsigned long long flip_A1(const unsigned long long P, const unsigned lon
 	outflank_d9 = ((O | ~0x8040201008040200) + 1) & P & 0x8040201008040200;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040200;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = (unsigned char) (O + 0x02) & P;
-	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 1);
-=======
-	outflank_h = (O + 0x02) & P;
-	flipped += ((outflank_h * 0x7f) >> 8) << 1;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = (unsigned char) (O + 0x02) & P;
 	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 1);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -512,18 +234,8 @@ static unsigned long long flip_B1(const unsigned long long P, const unsigned lon
 	outflank_d9 = ((O | ~0x0080402010080400) + 1) & P & 0x0080402010080400;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0080402010080400;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (unsigned char) (O + 0x04) & P;
 	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 2);
-=======
-	outflank_h = (O + 0x04) & P;
-	flipped += ((outflank_h * 0x3f) >> 8) << 2;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = (unsigned char) (O + 0x04) & P;
-	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 2);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -542,21 +254,11 @@ static unsigned long long flip_C1(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x0404040404040400) + 1) & P & 0x0404040404040400;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040400;
-<<<<<<< HEAD
-=======
-
-	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x00000040) + (LODWORD(O) & 0x20100a04)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000804020110a04) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]...
-	flipped |= FLIPPED_2_H[outflank_d] & 0x0000004020100a04;	// A3C1H6
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x00000040) + (LODWORD(O) & 0x20100a04)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000804020110a04) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]...
 	flipped |= FLIPPED_2_H[outflank_d] & 0x0000004020100a04;	// A3C1H6
 
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 	outflank_h = OUTFLANK_2[(O >> 1) & 0x3f] & rotl8(P, 4);
 	flipped |= (unsigned char) FLIPPED_2_H[outflank_h];
 
@@ -577,21 +279,11 @@ static unsigned long long flip_D1(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x0808080808080800) + 1) & P & 0x0808080808080800;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
-<<<<<<< HEAD
-=======
-
-	outflank_d = OUTFLANK_3[((LODWORD(O) & 0x40221408) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000008041221408) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0000000040221408;	// A4D1H5
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_3[((LODWORD(O) & 0x40221408) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000008041221408) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000000040221408;	// A4D1H5
 
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 	outflank_h = OUTFLANK_3[(O >> 1) & 0x3f] & rotl8(P, 3);
 	flipped |= (unsigned char) FLIPPED_3_H[outflank_h];
 
@@ -612,21 +304,11 @@ static unsigned long long flip_E1(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x1010101010101000) + 1) & P & 0x1010101010101000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
-<<<<<<< HEAD
-=======
 
 	outflank_d = OUTFLANK_4[((LODWORD(O) & 0x02442810) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000000182442810) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000002442810;	// A5E1H4
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_4[((LODWORD(O) & 0x02442810) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000000182442810) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000002442810;	// A5E1H4
-
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 	outflank_h = OUTFLANK_4[(O >> 1) & 0x3f] & rotl8(P, 2);
 	flipped |= (unsigned char) FLIPPED_4_H[outflank_h];
 
@@ -647,21 +329,11 @@ static unsigned long long flip_F1(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x2020202020202000) + 1) & P & 0x2020202020202000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020202000;
-<<<<<<< HEAD
-=======
 
 	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x00000002) + (LODWORD(O) & 0x04085020)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000010204885020) * 0x0101010101010101) >> 55;	// hgfe[dcbah]...
 	flipped |= FLIPPED_5_H[outflank_d] & 0x0000000204085020;	// A6F1H3
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x00000002) + (LODWORD(O) & 0x04085020)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000010204885020) * 0x0101010101010101) >> 55;	// hgfe[dcbah]...
-	flipped |= FLIPPED_5_H[outflank_d] & 0x0000000204085020;	// A6F1H3
-
-=======
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 	outflank_h = OUTFLANK_5[(O >> 1) & 0x3f] & rotl8(P, 1);
 	flipped |= (unsigned char) FLIPPED_5_H[outflank_h];
 
@@ -682,32 +354,13 @@ static unsigned long long flip_G1(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x4040404040404000) + 1) & P & 0x4040404040404000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040404000;
-<<<<<<< HEAD
-=======
 
 	outflank_d7 = ((O | ~0x0001020408102000) + 1) & P & 0x0001020408102000;
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
->>>>>>> 6506166 (More SSE optimizations)
 
-	outflank_d7 = ((O | ~0x0001020408102000) + 1) & P & 0x0001020408102000;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H((unsigned int) O << 26) & ((unsigned int) P << 26);
 	flipped |= (outflank_h * (unsigned int) -2) >> 26;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x0000003f) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x0000003f;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int) O << 26) & ((unsigned int) P << 26);
-	flipped |= (outflank_h * (unsigned int) -2) >> 26;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -725,32 +378,13 @@ static unsigned long long flip_H1(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x8080808080808000) + 1) & P & 0x8080808080808000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080808000;
-<<<<<<< HEAD
-=======
 
 	outflank_d7 = ((O | ~0x0102040810204000) + 1) & P & 0x0102040810204000;
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
->>>>>>> 6506166 (More SSE optimizations)
 
-	outflank_d7 = ((O | ~0x0102040810204000) + 1) & P & 0x0102040810204000;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) O << 25) & ((unsigned int) P << 25);
-	flipped |= (outflank_h * (unsigned int) -2) >> 25;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x0000007f) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x0000007f;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
 	outflank_h = outflank_right_H((unsigned int) O << 25) & ((unsigned int) P << 25);
 	flipped |= (outflank_h * (unsigned int) -2) >> 25;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -772,18 +406,8 @@ static unsigned long long flip_A2(const unsigned long long P, const unsigned lon
 	outflank_d9 = ((O | ~0x4020100804020000) + 1) & P & 0x4020100804020000;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x4020100804020000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (unsigned short) (O + 0x0200) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e00;
-=======
-	outflank_h = (O + 0x0200) & P;
-	flipped |= ((outflank_h * 0x7f) >> 16) << 9;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = (unsigned short) (O + 0x0200) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e00;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -806,18 +430,8 @@ static unsigned long long flip_B2(const unsigned long long P, const unsigned lon
 	outflank_d9 = ((O | ~0x8040201008040000) + 1) & P & 0x8040201008040000;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (unsigned short) (O + 0x0400) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c00;
-=======
-	outflank_h = (O + 0x0400) & P;
-	flipped |= ((outflank_h * 0x3f) >> 16) << 10;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = (unsigned short) (O + 0x0400) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c00;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -836,28 +450,13 @@ static unsigned long long flip_C2(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x0404040404040000) + 1) & P & 0x0404040404040000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040000;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x00004020) + (LODWORD(O) & 0x100a0400)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x00804020110a0400) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]...
-	flipped |= FLIPPED_2_H[outflank_d] & 0x00004020100a0400;	// A4C2H7
-
-	outflank_h = OUTFLANK_2[(O >> 9) & 0x3f] & rotl8(P >> 8, 4);
-	flipped |= (unsigned char) FLIPPED_2_H[outflank_h] << 8;
-=======
 
 	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x00004020) + (LODWORD(O) & 0x100a0400)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x00804020110a0400) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]...
 	flipped |= FLIPPED_2_H[outflank_d] & 0x00004020100a0400;	// A4C2H7
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_2[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x000000000000ff00;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_2[(O >> 9) & 0x3f] & rotl8(P >> 8, 4);
 	flipped |= (unsigned char) FLIPPED_2_H[outflank_h] << 8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -876,28 +475,13 @@ static unsigned long long flip_D2(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x0808080808080000) + 1) & P & 0x0808080808080000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 8) & 0x40221408) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000804122140800) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0000004022140800;	// A5D2H6
-
-	outflank_h = OUTFLANK_3[(O >> 9) & 0x3f] & rotl8(P >> 8, 3);
-	flipped |= (unsigned char) FLIPPED_3_H[outflank_h] << 8;
-=======
 
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 8) & 0x40221408) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000804122140800) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000004022140800;	// A5D2H6
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_3[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x000000000000ff00;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_3[(O >> 9) & 0x3f] & rotl8(P >> 8, 3);
 	flipped |= (unsigned char) FLIPPED_3_H[outflank_h] << 8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -916,7 +500,6 @@ static unsigned long long flip_E2(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x1010101010100000) + 1) & P & 0x1010101010100000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
-<<<<<<< HEAD
 
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 8) & 0x02442810) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000018244281000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
@@ -924,20 +507,6 @@ static unsigned long long flip_E2(const unsigned long long P, const unsigned lon
 
 	outflank_h = OUTFLANK_4[(O >> 9) & 0x3f] & rotl8(P >> 8, 2);
 	flipped |= (unsigned char) FLIPPED_4_H[outflank_h] << 8;
-=======
-
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 8) & 0x02442810) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000018244281000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000244281000;	// A6E2H5
-
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_4[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x000000000000ff00;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = OUTFLANK_4[(O >> 9) & 0x3f] & rotl8(P >> 8, 2);
-	flipped |= (unsigned char) FLIPPED_4_H[outflank_h] << 8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -956,28 +525,13 @@ static unsigned long long flip_F2(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x2020202020200000) + 1) & P & 0x2020202020200000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020200000;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x00000204) + (LODWORD(O) & 0x08502000)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0001020488502000) * 0x0101010101010101) >> 55;	// hgfe[dcbah]...
-	flipped |= FLIPPED_5_H[outflank_d] & 0x0000020408502000;	// A7F2H4
-
-	outflank_h = OUTFLANK_5[(O >> 9) & 0x3f] & rotl8(P >> 8, 1);
-	flipped |= (unsigned char) FLIPPED_5_H[outflank_h] << 8;
-=======
 
 	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x00000204) + (LODWORD(O) & 0x08502000)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0001020488502000) * 0x0101010101010101) >> 55;	// hgfe[dcbah]...
 	flipped |= FLIPPED_5_H[outflank_d] & 0x0000020408502000;	// A7F2H4
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_5[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x000000000000ff00;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_5[(O >> 9) & 0x3f] & rotl8(P >> 8, 1);
 	flipped |= (unsigned char) FLIPPED_5_H[outflank_h] << 8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -996,32 +550,13 @@ static unsigned long long flip_G2(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x4040404040400000) + 1) & P & 0x4040404040400000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040400000;
-<<<<<<< HEAD
-=======
-
-	outflank_d7 = ((O | ~0x0102040810200000) + 1) & P & 0x0102040810200000;
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0102040810200000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_d7 = ((O | ~0x0102040810200000) + 1) & P & 0x0102040810200000;
 	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0102040810200000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H(((unsigned int) O >> 9) << 27) & ((unsigned int) P << 18);
 	flipped |= (outflank_h * (unsigned int) -2) >> 18;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x00003f00) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x00003f00;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 9) << 27) & ((unsigned int) P << 18);
-	flipped |= (outflank_h * (unsigned int) -2) >> 18;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -1039,32 +574,13 @@ static unsigned long long flip_H2(const unsigned long long P, const unsigned lon
 
 	outflank_v = ((O | ~0x8080808080800000) + 1) & P & 0x8080808080800000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080800000;
-<<<<<<< HEAD
-=======
 
 	outflank_d7 = ((O | ~0x0204081020400000) + 1) & P & 0x0204081020400000;
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
->>>>>>> 6506166 (More SSE optimizations)
 
-	outflank_d7 = ((O | ~0x0204081020400000) + 1) & P & 0x0204081020400000;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H(((unsigned int) O >> 9) << 26) & ((unsigned int) P << 17);
 	flipped |= (outflank_h * (unsigned int) -2) >> 17;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x00007f00) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x00007f00;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 9) << 26) & ((unsigned int) P << 17);
-	flipped |= (outflank_h * (unsigned int) -2) >> 17;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -1080,50 +596,16 @@ static unsigned long long flip_A3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_a1a3f8, outflank_a8a3c1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_a1a3f8 = OUTFLANK_2[((O & 0x0010080402010100) * 0x0102040404040404) >> 57];
-	outflank_a1a3f8 &= ((P & 0x2010080402010101) * 0x8000000002020202) >> 59;	// 18765
-	flipped = FLIPPED_2_V[outflank_a1a3f8] & 0x0010080402010100;
-
-<<<<<<< HEAD
-	outflank_a8a3c1 = OUTFLANK_5[((O & 0x0001010101010200) * 0x2020201008040201) >> 57];
-	outflank_a8a3c1 &= ((P & 0x0101010101010204) * 0x0200000080402010) >> 59;	// 56781
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_a8a3c1]) & 0x0001010101010200;
-=======
-	outflank_a8a3c1 = OUTFLANK_5[((O & 0x0001010101010200ULL) * 0x2020201008040201ULL) >> 57]
-		& (((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_a8a3c1]) & 0x0001010101010200ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-	outflank_h = ((O & 0x007e0000) + 0x00020000) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007e0000;
-=======
-	outflank_a1a3f8 = OUTFLANK_2[((O & 0x0010080402010100) * 0x0102040404040404) >> 57]
-		& (((P & 0x2010080402010101) * 0x0102040404040404) >> 56);
-=======
-	outflank_a1a3f8 = ((P & 0x2010080402010101) * 0x8000000002020202) >> 59;	// 18765
-	outflank_a1a3f8 = OUTFLANK_2[((O & 0x0010080402010100) * 0x0102040404040404) >> 57] & outflank_a1a3f8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_a1a3f8 = OUTFLANK_2[((O & 0x0010080402010100) * 0x0102040404040404) >> 57];
 	outflank_a1a3f8 &= ((P & 0x2010080402010101) * 0x8000000002020202) >> 59;	// 18765
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_2_V[outflank_a1a3f8] & 0x0010080402010100;
 
 	outflank_a8a3c1 = OUTFLANK_5[((O & 0x0001010101010200) * 0x2020201008040201) >> 57];
 	outflank_a8a3c1 &= ((P & 0x0101010101010204) * 0x0200000080402010) >> 59;	// 56781
 	flipped |= vertical_mirror(FLIPPED_5_V[outflank_a8a3c1]) & 0x0001010101010200;
 
-<<<<<<< HEAD
-	outflank_h = ((O & 0x007e0000u) + 0x00020000u) & P;
-	flipped |= ((outflank_h * 0x7f) >> 24) << 17;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = ((O & 0x007e0000) + 0x00020000) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007e0000;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -1140,50 +622,16 @@ static unsigned long long flip_B3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_b1b3g8, outflank_b8b3d1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_b1b3g8 = OUTFLANK_2[((O & 0x0020100804020200) * 0x0081020202020202) >> 57];
 	outflank_b1b3g8 &= ((P & 0x4020100804020202) * 0x4000000001010101) >> 59;	// 18765
 	flipped = FLIPPED_2_V[outflank_b1b3g8] & 0x0020100804020200;
 
-<<<<<<< HEAD
 	outflank_b8b3d1 = OUTFLANK_5[((O & 0x0002020202020400) * 0x0010100804020100) >> 57];
 	outflank_b8b3d1 &= ((P & 0x0202020202020408) * 0x0100000040201008) >> 59;	// 56781
 	flipped |= vertical_mirror(FLIPPED_5_V[outflank_b8b3d1]) & 0x0002020202020400;
-=======
-	outflank_b8b3d1 = OUTFLANK_5[((O & 0x0002020202020400ULL) * 0x0010100804020100ULL) >> 57]
-		& ((((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_b8b3d1]) & 0x0002020202020400ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
 
 	outflank_h = ((O & 0x007c0000) + 0x00040000) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007c0000;
-=======
-	outflank_b1b3g8 = OUTFLANK_2[((O & 0x0020100804020200) * 0x0081020202020202) >> 57]
-		& (((P & 0x4020100804020202) * 0x0081020202020202) >> 56);
-=======
-	outflank_b1b3g8 = ((P & 0x4020100804020202) * 0x4000000001010101) >> 59;	// 18765
-	outflank_b1b3g8 = OUTFLANK_2[((O & 0x0020100804020200) * 0x0081020202020202) >> 57] & outflank_b1b3g8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_b1b3g8 = OUTFLANK_2[((O & 0x0020100804020200) * 0x0081020202020202) >> 57];
-	outflank_b1b3g8 &= ((P & 0x4020100804020202) * 0x4000000001010101) >> 59;	// 18765
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_2_V[outflank_b1b3g8] & 0x0020100804020200;
-
-	outflank_b8b3d1 = OUTFLANK_5[((O & 0x0002020202020400) * 0x0010100804020100) >> 57];
-	outflank_b8b3d1 &= ((P & 0x0202020202020408) * 0x0100000040201008) >> 59;	// 56781
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_b8b3d1]) & 0x0002020202020400;
-
-<<<<<<< HEAD
-	outflank_h = ((O & 0x007c0000u) + 0x00040000u) & P;
-	flipped |= ((outflank_h * 0x3f) >> 24) << 18;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = ((O & 0x007c0000) + 0x00040000) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007c0000;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -1200,11 +648,6 @@ static unsigned long long flip_C3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d9;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_2[((O & 0x0004040404040400) * 0x0040810204081020) >> 57];
 	outflank_v &= ((P & 0x0404040404040404) * 0x2000000002040810) >> 59;	// 18765
 	flipped = FLIPPED_2_V[outflank_v] & 0x0004040404040400;
@@ -1216,31 +659,6 @@ static unsigned long long flip_C3(const unsigned long long P, const unsigned lon
 
 	outflank_d9 = OUTFLANK_2[(((HIDWORD(O) & 0x00402010) + (LODWORD(O) & 0x08040200)) * 0x01010101) >> 25];
 	outflank_d9 &= rotl8((((HIDWORD(P) & 0x80402010) + (LODWORD(P) & 0x08040201)) * 0x01010101) >> 24, 4);	// (h8)
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_2[((O & 0x0004040404040400) * 0x0040810204081020) >> 57]
-		& (((P & 0x0404040404040404) * 0x0040810204081020) >> 56);
-=======
-	outflank_v = ((P & 0x0404040404040404) * 0x2000000002040810) >> 59;	// 18765
-	outflank_v = OUTFLANK_2[((O & 0x0004040404040400) * 0x0040810204081020) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_2_V[outflank_v] & 0x0004040404040400;
-
-	outflank_h = OUTFLANK_2[(O >> 17) & 0x3f] & rotl8(P >> 16, 4);
-	flipped |= (unsigned char) FLIPPED_2_H[outflank_h] << 16;
-
-	flipped |= (((P >> 7) | (P << 7)) & 0x000000002000800) & O;
-
-<<<<<<< HEAD
-	outflank_d9 = OUTFLANK_2[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d9 = ((P & 0x8040201008040201) * 0x0101010101010101) >> 56;	// (h8)
-	outflank_d9 = OUTFLANK_2[((O & 0x0040201008040200) * 0x0101010101010101) >> 57] & rotl8(outflank_d9, 4);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_2_H[outflank_d9] & 0x0040201008040200;
 
 	return flipped;
@@ -1258,11 +676,6 @@ static unsigned long long flip_D3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_2[((O & 0x0008080808080800) * 0x0020408102040810) >> 57];
 	outflank_v &= ((P & 0x0808080808080808) * 0x1020408001020408) >> 59;	// 18765
 	flipped = FLIPPED_2_V[outflank_v] & 0x0008080808080800;
@@ -1272,29 +685,6 @@ static unsigned long long flip_D3(const unsigned long long P, const unsigned lon
 
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 16) & 0x40221408) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0080412214080000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_2[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-=======
-	outflank_v = ((P & 0x0808080808080808) * 0x1020408001020408) >> 59;	// 18765
-	outflank_v = OUTFLANK_2[((O & 0x0008080808080800) * 0x0020408102040810) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_2_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 17) & 0x3f] & rotl8(P >> 16, 3);
-	flipped |= (unsigned char) FLIPPED_3_H[outflank_h] << 16;
-
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_3[((O & 0x0000402214080000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0080412214080000) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d = ((P & 0x0080412214080000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0000402214080000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000402214080000;	// A6D3H7
 
 	flipped |= (((P << 7) & 0x0000000000001000) | ((P << 9) & 0x000000000000400)) & O;
@@ -1314,11 +704,6 @@ static unsigned long long flip_E3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_2[((O & 0x0010101010101000) * 0x0010204081020408) >> 57];
 	outflank_v &= ((P & 0x1010101010101010) * 0x0810204000810204) >> 59;	// 18765
 	flipped = FLIPPED_2_V[outflank_v] & 0x0010101010101000;
@@ -1328,29 +713,6 @@ static unsigned long long flip_E3(const unsigned long long P, const unsigned lon
 
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 16) & 0x02442810) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0001824428100000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_2[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-=======
-	outflank_v = ((P & 0x1010101010101010) * 0x0810204000810204) >> 59;	// 18765
-	outflank_v = OUTFLANK_2[((O & 0x0010101010101000) * 0x0010204081020408) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_2_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 17) & 0x3f] & rotl8(P >> 16, 2);
-	flipped |= (unsigned char) FLIPPED_4_H[outflank_h] << 16;
-
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_4[((O & 0x0000024428100000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0001824428100000) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d = ((P & 0x0001824428100000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0000024428100000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000024428100000;	// A7E3H6
 
 	flipped |= (((P << 7) & 0x0000000000002000) | ((P << 9) & 0x000000000000800)) & O;
@@ -1370,11 +732,6 @@ static unsigned long long flip_F3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d7;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_2[((O & 0x0020202020202000) * 0x0008102040810204) >> 57];
 	outflank_v &= ((P & 0x2020202020202020) * 0x0408102000408102) >> 59;	// 18765
 	flipped = FLIPPED_2_V[outflank_v] & 0x0020202020202000;
@@ -1384,29 +741,6 @@ static unsigned long long flip_F3(const unsigned long long P, const unsigned lon
 
 	outflank_d7 = OUTFLANK_5[(((HIDWORD(O) & 0x00020408) + (LODWORD(O) & 0x10204000)) * 0x01010101) >> 25];
 	outflank_d7 &= ((P & 0x0102040810204080) * 0x0010000010101010) >> 59;	// dcbah
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_2[((O & 0x0020202020202000) * 0x0008102040810204) >> 57]
-		& (((P & 0x2020202020202020) * 0x0008102040810204) >> 56);
-=======
-	outflank_v = ((P & 0x2020202020202020) * 0x0408102000408102) >> 59;	// 18765
-	outflank_v = OUTFLANK_2[((O & 0x0020202020202000) * 0x0008102040810204) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_2_V[outflank_v] & 0x0020202020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 17) & 0x3f] & rotl8(P >> 16, 1);
-	flipped |= (unsigned char) FLIPPED_5_H[outflank_h] << 16;
-
-<<<<<<< HEAD
-	outflank_d7 = OUTFLANK_5[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0102040810204080) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d7 = ((P & 0x0102040810204080) * 0x0010000010101010) >> 59;	// dcbah
-	outflank_d7 = OUTFLANK_5[((O & 0x0002040810204000) * 0x0101010101010101) >> 57] & outflank_d7;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_5_H[outflank_d7] & 0x0002040810204000;
 
 	flipped |= (((P >> 9) | (P << 9)) & 0x0000000040001000) & O;
@@ -1426,56 +760,16 @@ static unsigned long long flip_G3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_e1g3g8, outflank_b8g3g1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_e1g3g8 = OUTFLANK_2[((O & 0x0040404040402000) * 0x0010101020408102) >> 57];
 	outflank_e1g3g8 &= ((P & 0x4040404040402010) * 0x0800000000204081) >> 59;	// 18765
 	flipped = FLIPPED_2_V[outflank_e1g3g8] & 0x0040404040402000;
 
-<<<<<<< HEAD
 	outflank_b8g3g1 = OUTFLANK_5[((O & 0x0004081020404000) * 0x0402010101010101) >> 58];
 	outflank_b8g3g1 &= ((P & 0x0204081020404040) * 0x0020000008080808) >> 59;	// 43218
 	flipped |= vertical_mirror(FLIPPED_5_V[outflank_b8g3g1]) & 0x0004081020404000;
-=======
-	outflank_b8g3g1 = OUTFLANK_5[((O & 0x0004081020404000ULL) * 0x0402010101010101ULL) >> 58]
-		& ((((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_b8g3g1]) & 0x0004081020404000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_e1g3g8 = OUTFLANK_2[((O & 0x0040404040402000) * 0x0010101020408102) >> 57]
-		& (((P & 0x4040404040402010) * 0x0010101020408102) >> 56);
-=======
-	outflank_e1g3g8 = ((P & 0x4040404040402010) * 0x0800000000204081) >> 59;	// 18765
-	outflank_e1g3g8 = OUTFLANK_2[((O & 0x0040404040402000) * 0x0010101020408102) >> 57] & outflank_e1g3g8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_e1g3g8 = OUTFLANK_2[((O & 0x0040404040402000) * 0x0010101020408102) >> 57];
-	outflank_e1g3g8 &= ((P & 0x4040404040402010) * 0x0800000000204081) >> 59;	// 18765
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_2_V[outflank_e1g3g8] & 0x0040404040402000;
-
-	outflank_b8g3g1 = OUTFLANK_5[((O & 0x0004081020404000) * 0x0402010101010101) >> 58];
-	outflank_b8g3g1 &= ((P & 0x0204081020404040) * 0x0020000008080808) >> 59;	// 43218
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_b8g3g1]) & 0x0004081020404000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H(((unsigned int) O >> 17) << 27) & (unsigned int) (P << 10);
-	flipped |= (outflank_h * (unsigned int) -2) >> 10;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x003f0000) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x003f0000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 17) << 27) & (unsigned int)(P << 10);
-=======
 	outflank_h = outflank_right_H(((unsigned int) O >> 17) << 27) & (unsigned int) (P << 10);
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= (outflank_h * (unsigned int) -2) >> 10;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -1492,56 +786,16 @@ static unsigned long long flip_H3(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_f1h3h8, outflank_c8h3h1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_f1h3h8 = OUTFLANK_2[((O & 0x0080808080804000) * 0x0008080810204081) >> 57];
 	outflank_f1h3h8 &= rotl8(((P & 0x8080808080804020) * 0x0008080810204081) >> 56, 4);	// (h8)
 	flipped = FLIPPED_2_V[outflank_f1h3h8] & 0x0080808080804000;
 
-<<<<<<< HEAD
 	outflank_c8h3h1 = OUTFLANK_5[((O & 0x0008102040808000) * 0x0000804040404040) >> 57];
 	outflank_c8h3h1 &= ((P & 0x0408102040808080) * 0x0010000004040404) >> 59;	// 43218
 	flipped |= vertical_mirror(FLIPPED_5_V[outflank_c8h3h1]) & 0x0008102040808000;
-=======
-	outflank_c8h3h1 = OUTFLANK_5[((O & 0x0008102040808000ULL) * 0x0000804040404040ULL) >> 57]
-		& ((((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_c8h3h1]) & 0x0008102040808000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_f1h3h8 = OUTFLANK_2[((O & 0x0080808080804000) * 0x0008080810204081) >> 57]
-		& (((P & 0x8080808080804020) * 0x0008080810204081) >> 56);
-=======
-	outflank_f1h3h8 = ((P & 0x8080808080804020) * 0x0008080810204081) >> 56;	// (h8)
-	outflank_f1h3h8 = OUTFLANK_2[((O & 0x0080808080804000) * 0x0008080810204081) >> 57] & rotl8(outflank_f1h3h8, 4);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_f1h3h8 = OUTFLANK_2[((O & 0x0080808080804000) * 0x0008080810204081) >> 57];
-	outflank_f1h3h8 &= rotl8(((P & 0x8080808080804020) * 0x0008080810204081) >> 56, 4);	// (h8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_2_V[outflank_f1h3h8] & 0x0080808080804000;
-
-	outflank_c8h3h1 = OUTFLANK_5[((O & 0x0008102040808000) * 0x0000804040404040) >> 57];
-	outflank_c8h3h1 &= ((P & 0x0408102040808080) * 0x0010000004040404) >> 59;	// 43218
-	flipped |= vertical_mirror(FLIPPED_5_V[outflank_c8h3h1]) & 0x0008102040808000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H(((unsigned int) O >> 17) << 26) & (unsigned int) (P << 9);
 	flipped |= (outflank_h * (unsigned int) -2) >> 9;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x007f0000) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x007f0000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 17) << 26) & (unsigned int)(P << 9);
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 17) << 26) & (unsigned int) (P << 9);
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped |= (outflank_h * (unsigned int) -2) >> 9;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -1558,50 +812,16 @@ static unsigned long long flip_A4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_a1a4e8, outflank_a8a4d1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_a1a4e8 = OUTFLANK_3[((O & 0x0008040201010100) * 0x0102040808080808) >> 57];
-	outflank_a1a4e8 &= ((P & 0x1008040201010101) * 0x4080000000020202) >> 59;	// 21876
-	flipped = FLIPPED_3_V[outflank_a1a4e8] & 0x0008040201010100;
-
-<<<<<<< HEAD
-	outflank_a8a4d1 = OUTFLANK_4[((O & 0x0001010101020400) * 0x1010101008040201) >> 57];
-	outflank_a8a4d1 &= ((P & 0x0101010101020408) * 0x0202000000804020) >> 59;	// 67812
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_a8a4d1]) & 0x0001010101020400;
-=======
-	outflank_a8a4d1 = OUTFLANK_4[((O & 0x0001010101020400ULL) * 0x1010101008040201ULL) >> 57]
-		& (((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_a8a4d1]) & 0x0001010101020400ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-	outflank_h = ((unsigned int) O + 0x02000000) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e000000;
-=======
-	outflank_a1a4e8 = OUTFLANK_3[((O & 0x0008040201010100) * 0x0102040808080808) >> 57]
-		& (((P & 0x1008040201010101) * 0x0102040808080808) >> 56);
-=======
-	outflank_a1a4e8 = ((P & 0x1008040201010101) * 0x4080000000020202) >> 59;	// 21876
-	outflank_a1a4e8 = OUTFLANK_3[((O & 0x0008040201010100) * 0x0102040808080808) >> 57] & outflank_a1a4e8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_a1a4e8 = OUTFLANK_3[((O & 0x0008040201010100) * 0x0102040808080808) >> 57];
 	outflank_a1a4e8 &= ((P & 0x1008040201010101) * 0x4080000000020202) >> 59;	// 21876
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_3_V[outflank_a1a4e8] & 0x0008040201010100;
 
 	outflank_a8a4d1 = OUTFLANK_4[((O & 0x0001010101020400) * 0x1010101008040201) >> 57];
 	outflank_a8a4d1 &= ((P & 0x0101010101020408) * 0x0202000000804020) >> 59;	// 67812
 	flipped |= vertical_mirror(FLIPPED_4_V[outflank_a8a4d1]) & 0x0001010101020400;
 
-<<<<<<< HEAD
-	outflank_h = ((unsigned int) O + 0x02000000u) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e000000u;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = ((unsigned int) O + 0x02000000) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e000000;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -1618,50 +838,16 @@ static unsigned long long flip_B4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_b1b4f8, outflank_b8b4e1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_b1b4f8 = OUTFLANK_3[((O & 0x0010080402020200) * 0x0081020404040404) >> 57];
 	outflank_b1b4f8 &= ((P & 0x2010080402020202) * 0x2040000000010101) >> 59;	// 21876
 	flipped = FLIPPED_3_V[outflank_b1b4f8] & 0x0010080402020200;
 
-<<<<<<< HEAD
 	outflank_b8b4e1 = OUTFLANK_4[((O & 0x0002020202040800) * 0x1010101008040201) >> 58];
 	outflank_b8b4e1 &= ((P & 0x0202020202040810) * 0x0101000000402010) >> 59;	// 67812
 	flipped |= vertical_mirror(FLIPPED_4_V[outflank_b8b4e1]) & 0x0002020202040800;
-=======
-	outflank_b8b4e1 = OUTFLANK_4[((O & 0x0002020202040800ULL) * 0x1010101008040201ULL) >> 58]
-		& ((((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_b8b4e1]) & 0x0002020202040800ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
 
 	outflank_h = ((unsigned int) O + 0x04000000) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c000000;
-=======
-	outflank_b1b4f8 = OUTFLANK_3[((O & 0x0010080402020200) * 0x0081020404040404) >> 57]
-		& (((P & 0x2010080402020202) * 0x0081020404040404) >> 56);
-=======
-	outflank_b1b4f8 = ((P & 0x2010080402020202) * 0x2040000000010101) >> 59;	// 21876
-	outflank_b1b4f8 = OUTFLANK_3[((O & 0x0010080402020200) * 0x0081020404040404) >> 57] & outflank_b1b4f8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_b1b4f8 = OUTFLANK_3[((O & 0x0010080402020200) * 0x0081020404040404) >> 57];
-	outflank_b1b4f8 &= ((P & 0x2010080402020202) * 0x2040000000010101) >> 59;	// 21876
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_3_V[outflank_b1b4f8] & 0x0010080402020200;
-
-	outflank_b8b4e1 = OUTFLANK_4[((O & 0x0002020202040800) * 0x1010101008040201) >> 58];
-	outflank_b8b4e1 &= ((P & 0x0202020202040810) * 0x0101000000402010) >> 59;	// 67812
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_b8b4e1]) & 0x0002020202040800;
-
-<<<<<<< HEAD
-	outflank_h = ((unsigned int) O + 0x04000000u) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c000000u;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = ((unsigned int) O + 0x04000000) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c000000;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 
 	return flipped;
 }
@@ -1678,50 +864,16 @@ static unsigned long long flip_C4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_c1c4g8, outflank_c8c4f1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_c1c4g8 = OUTFLANK_3[((O & 0x0020100804040400) * 0x0040810202020202) >> 57];
 	outflank_c1c4g8 &= rotl8(((P & 0x4020100804040404) * 0x0040810202020202) >> 56, 3);	// (g8)
 	flipped = FLIPPED_3_V[outflank_c1c4g8] & 0x0020100804040400;
 
-<<<<<<< HEAD
 	outflank_c8c4f1 = OUTFLANK_4[((O & 0x0004040404081000) * 0x0404040402010080) >> 57];
 	outflank_c8c4f1 &= ((P & 0x0404040404081020) * 0x0080800000201008) >> 59;	// 67812
 	flipped |= vertical_mirror(FLIPPED_4_V[outflank_c8c4f1]) & 0x0004040404081000;
-=======
-	outflank_c8c4f1 = OUTFLANK_4[((O & 0x0004040404081000ULL) * 0x0404040402010080ULL) >> 57]
-		& ((((P & 0x0404040404081020ULL) >> 2) * 0x1010101008040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_c8c4f1]) & 0x0004040404081000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
 
 	outflank_h = OUTFLANK_2[(O >> 25) & 0x3f] & rotl8(P >> 24, 4);
 	flipped |= (unsigned char) FLIPPED_2_H[outflank_h] << 24;
-=======
-	outflank_c1c4g8 = OUTFLANK_3[((O & 0x0020100804040400) * 0x0040810202020202) >> 57]
-		& (((P & 0x4020100804040404) * 0x0040810202020202) >> 56);
-=======
-	outflank_c1c4g8 = ((P & 0x4020100804040404) * 0x0040810202020202) >> 56;	// (g8)
-	outflank_c1c4g8 = OUTFLANK_3[((O & 0x0020100804040400) * 0x0040810202020202) >> 57] & rotl8(outflank_c1c4g8, 3);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_c1c4g8 = OUTFLANK_3[((O & 0x0020100804040400) * 0x0040810202020202) >> 57];
-	outflank_c1c4g8 &= rotl8(((P & 0x4020100804040404) * 0x0040810202020202) >> 56, 3);	// (g8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_3_V[outflank_c1c4g8] & 0x0020100804040400;
-
-	outflank_c8c4f1 = OUTFLANK_4[((O & 0x0004040404081000) * 0x0404040402010080) >> 57];
-	outflank_c8c4f1 &= ((P & 0x0404040404081020) * 0x0080800000201008) >> 59;	// 67812
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_c8c4f1]) & 0x0004040404081000;
-
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_2[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x00000000ff000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = OUTFLANK_2[(O >> 25) & 0x3f] & rotl8(P >> 24, 4);
-	flipped |= (unsigned char) FLIPPED_2_H[outflank_h] << 24;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	flipped |= (((P << 9) & 0x00000000000020000) | ((P >> 7) & 0x00000000200000000)) & O;
 
@@ -1740,9 +892,6 @@ static unsigned long long flip_D4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_v = OUTFLANK_3[((O & 0x0008080808080800) * 0x0020408102040810) >> 57];
 	outflank_v &= ((P & 0x0808080808080808) * 0x0810000000010204) >> 59;	// 21876
 	flipped = FLIPPED_3_V[outflank_v] & 0x0008080808080800;
@@ -1756,39 +905,6 @@ static unsigned long long flip_D4(const unsigned long long P, const unsigned lon
 
 	outflank_d9 = OUTFLANK_3[(((HIDWORD(O) & 0x00402010) + (LODWORD(O) & 0x08040200)) * 0x01010101) >> 25];
 	outflank_d9 &= rotl8((((HIDWORD(P) & 0x80402010) + (LODWORD(P) & 0x08040201)) * 0x01010101) >> 24, 3);	// (h8)
-=======
-	outflank_v = OUTFLANK_3[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-=======
-	outflank_v = ((P & 0x0808080808080808) * 0x0810000000010204) >> 59;	// 21876
-	outflank_v = OUTFLANK_3[((O & 0x0008080808080800) * 0x0020408102040810) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_v = OUTFLANK_3[((O & 0x0008080808080800) * 0x0020408102040810) >> 57];
-	outflank_v &= ((P & 0x0808080808080808) * 0x0810000000010204) >> 59;	// 21876
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_3_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 25) & 0x3f] & rotl8(P >> 24, 3);
-	flipped |= (unsigned char) FLIPPED_3_H[outflank_h] << 24;
-
-	outflank_d7 = OUTFLANK_3[(((HIDWORD(O) & 0x00000204) + (LODWORD(O) & 0x08102000)) * 0x01010101) >> 25];
-	outflank_d7 &= ((P & 0x0001020408102040) * 0x0040400000404000) >> 59;	// ba0gf
-	flipped |= FLIPPED_3_H[outflank_d7] & 0x0000020408102000;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = OUTFLANK_3[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d9 = ((P & 0x8040201008040201) * 0x0101010101010101) >> 56;	// (h8)
-	outflank_d9 = OUTFLANK_3[((O & 0x0040201008040200) * 0x0101010101010101) >> 57] & rotl8(outflank_d9, 3);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_d9 = OUTFLANK_3[(((HIDWORD(O) & 0x00402010) + (LODWORD(O) & 0x08040200)) * 0x01010101) >> 25];
-	outflank_d9 &= rotl8((((HIDWORD(P) & 0x80402010) + (LODWORD(P) & 0x08040201)) * 0x01010101) >> 24, 3);	// (h8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d9] & 0x0040201008040200;
 
 	return flipped;
@@ -1801,38 +917,13 @@ static unsigned long long flip_D4(const unsigned long long P, const unsigned lon
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-static unsigned long long flip_E4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	unsigned long long flipped;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_v = OUTFLANK_3[((O & 0x0010101010101000) * 0x0010204081020408) >> 57];
-	outflank_v &= ((P & 0x1010101010101010) * 0x0408000000008102) >> 59;	// 21876
-	flipped = FLIPPED_3_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 25) & 0x3f] & rotl8(P >> 24, 2);
-	flipped |= (unsigned char) FLIPPED_4_H[outflank_h] << 24;
-
-	outflank_d7 = OUTFLANK_4[(((HIDWORD(O) & 0x00020408) + (LODWORD(O) & 0x10204000)) * 0x01010101) >> 25];
-	outflank_d7 &= ((P & 0x0102040810204080) * 0x0020200000202020) >> 59;	// cbahg
-	flipped |= FLIPPED_4_H[outflank_d7] & 0x0002040810204000;
+static unsigned long long flip_E4(const unsigned long long P, const unsigned long long O)
+{
+	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
+	unsigned long long flipped;
 
-	outflank_d9 = OUTFLANK_4[(((HIDWORD(O) & 0x00004020) + (LODWORD(O) & 0x10080400)) * 0x01010101) >> 25];
-	outflank_d9 &= ((P & 0x0080402010080402) * 0x0404000000040404) >> 56;	// cbahg
-=======
-	outflank_v = OUTFLANK_3[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-=======
-	outflank_v = ((P & 0x1010101010101010) * 0x0408000000008102) >> 59;	// 21876
-	outflank_v = OUTFLANK_3[((O & 0x0010101010101000) * 0x0010204081020408) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_v = OUTFLANK_3[((O & 0x0010101010101000) * 0x0010204081020408) >> 57];
 	outflank_v &= ((P & 0x1010101010101010) * 0x0408000000008102) >> 59;	// 21876
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_3_V[outflank_v] & 0x0010101010101000;
 
 	outflank_h = OUTFLANK_4[(O >> 25) & 0x3f] & rotl8(P >> 24, 2);
@@ -1842,19 +933,8 @@ static unsigned long long flip_E4(const unsigned long long P, const unsigned lon
 	outflank_d7 &= ((P & 0x0102040810204080) * 0x0020200000202020) >> 59;	// cbahg
 	flipped |= FLIPPED_4_H[outflank_d7] & 0x0002040810204000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = OUTFLANK_4[((O & 0x0000402010080400) * 0x0101010101010101) >> 57]
-		& (((P & 0x0080402010080402) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d9 = ((P & 0x0080402010080402) * 0x0404000000040404) >> 56;	// cbahg
-	outflank_d9 = OUTFLANK_4[((O & 0x0000402010080400) * 0x0101010101010101) >> 57] & outflank_d9;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_d9 = OUTFLANK_4[(((HIDWORD(O) & 0x00004020) + (LODWORD(O) & 0x10080400)) * 0x01010101) >> 25];
 	outflank_d9 &= ((P & 0x0080402010080402) * 0x0404000000040404) >> 56;	// cbahg
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d9] & 0x0000402010080400;
 
 	return flipped;
@@ -1872,50 +952,16 @@ static unsigned long long flip_F4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_c1f4f8, outflank_b8f4f1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_c1f4f8 = OUTFLANK_3[((O & 0x0020202020100800) * 0x0040404040810204) >> 57];
-	outflank_c1f4f8 &= ((P & 0x2020202020100804) * 0x1010000000004081) >> 59;	// 21876
-	flipped = FLIPPED_3_V[outflank_c1f4f8] & 0x0020202020100800;
-
-<<<<<<< HEAD
-	outflank_b8f4f1 = OUTFLANK_4[((O & 0x0004081020202000) * 0x0804020101010101) >> 58];
-	outflank_b8f4f1 &= ((P & 0x0204081020202020) * 0x0080400000101010) >> 59;	// 67812
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_b8f4f1]) & 0x0004081020202000;
-=======
-	outflank_b8f4f1 = OUTFLANK_4[((O & 0x0004081020202000ULL) * 0x0804020101010101ULL) >> 58]
-		& ((((P & 0x0204081020202020ULL) >> 1) * 0x0804020101010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_b8f4f1]) & 0x0004081020202000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-	outflank_h = OUTFLANK_5[(O >> 25) & 0x3f] & rotl8(P >> 24, 1);
-	flipped |= (unsigned char) FLIPPED_5_H[outflank_h] << 24;
-=======
-	outflank_c1f4f8 = OUTFLANK_3[((O & 0x0020202020100800) * 0x0040404040810204) >> 57]
-		& (((P & 0x2020202020100804) * 0x0040404040810204) >> 56);
-=======
-	outflank_c1f4f8 = ((P & 0x2020202020100804) * 0x1010000000004081) >> 59;	// 21876
-	outflank_c1f4f8 = OUTFLANK_3[((O & 0x0020202020100800) * 0x0040404040810204) >> 57] & outflank_c1f4f8;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_c1f4f8 = OUTFLANK_3[((O & 0x0020202020100800) * 0x0040404040810204) >> 57];
 	outflank_c1f4f8 &= ((P & 0x2020202020100804) * 0x1010000000004081) >> 59;	// 21876
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_3_V[outflank_c1f4f8] & 0x0020202020100800;
 
 	outflank_b8f4f1 = OUTFLANK_4[((O & 0x0004081020202000) * 0x0804020101010101) >> 58];
 	outflank_b8f4f1 &= ((P & 0x0204081020202020) * 0x0080400000101010) >> 59;	// 67812
 	flipped |= vertical_mirror(FLIPPED_4_V[outflank_b8f4f1]) & 0x0004081020202000;
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_5[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x00000000ff000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_5[(O >> 25) & 0x3f] & rotl8(P >> 24, 1);
 	flipped |= (unsigned char) FLIPPED_5_H[outflank_h] << 24;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	flipped |= (((P << 7) & 0x0000000000400000) | ((P >> 9) & 0x0000004000000000)) & O;
 
@@ -1934,56 +980,16 @@ static unsigned long long flip_G4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_d1g4g8, outflank_c8g4g1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d1g4g8 = OUTFLANK_3[((O & 0x0040404040201000) * 0x0020202020408102) >> 57];
 	outflank_d1g4g8 &= rotl8(((P & 0x4040404040201008) * 0x0020202020408102) >> 56, 3);	// (g8)
 	flipped = FLIPPED_3_V[outflank_d1g4g8] & 0x0040404040201000;
 
-<<<<<<< HEAD
 	outflank_c8g4g1 = OUTFLANK_4[((O & 0x0008102040404000) * 0x0001008040404040) >> 57];
 	outflank_c8g4g1 &= ((P & 0x0408102040404040) * 0x0040200000080808) >> 59;	// 67812
 	flipped |= vertical_mirror(FLIPPED_4_V[outflank_c8g4g1]) & 0x0008102040404000;
-=======
-	outflank_c8g4g1 = OUTFLANK_4[((O & 0x0008102040404000ULL) * 0x0001008040404040ULL) >> 57]
-		& ((((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_c8g4g1]) & 0x0008102040404000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_d1g4g8 = OUTFLANK_3[((O & 0x0040404040201000) * 0x0020202020408102) >> 57]
-		& (((P & 0x4040404040201008) * 0x0020202020408102) >> 56);
-=======
-	outflank_d1g4g8 = ((P & 0x4040404040201008) * 0x0020202020408102) >> 56;	// (g8)
-	outflank_d1g4g8 = OUTFLANK_3[((O & 0x0040404040201000) * 0x0020202020408102) >> 57] & rotl8(outflank_d1g4g8, 3);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_d1g4g8 = OUTFLANK_3[((O & 0x0040404040201000) * 0x0020202020408102) >> 57];
-	outflank_d1g4g8 &= rotl8(((P & 0x4040404040201008) * 0x0020202020408102) >> 56, 3);	// (g8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_3_V[outflank_d1g4g8] & 0x0040404040201000;
-
-	outflank_c8g4g1 = OUTFLANK_4[((O & 0x0008102040404000) * 0x0001008040404040) >> 57];
-	outflank_c8g4g1 &= ((P & 0x0408102040404040) * 0x0040200000080808) >> 59;	// 67812
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_c8g4g1]) & 0x0008102040404000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H(((unsigned int) O >> 25) << 27) & (unsigned int) (P << 2);
-	flipped |= (outflank_h * (unsigned int) -2) >> 2;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x3f000000) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x3f000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 25) << 27) & (unsigned int)(P << 2);
-=======
 	outflank_h = outflank_right_H(((unsigned int) O >> 25) << 27) & (unsigned int) (P << 2);
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= (outflank_h * (unsigned int) -2) >> 2;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -2000,56 +1006,16 @@ static unsigned long long flip_H4(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_e1h4h8, outflank_d8h4h1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_e1h4h8 = OUTFLANK_3[((O & 0x0080808080402000) * 0x0010101010204081) >> 57];
-	outflank_e1h4h8 &= rotl8(((P & 0x8080808080402010) * 0x0010101010204081) >> 56, 3);	// (h8)
-	flipped = FLIPPED_3_V[outflank_e1h4h8] & 0x0080808080402000;
-
-<<<<<<< HEAD
-	outflank_d8h4h1 = OUTFLANK_4[((O & 0x0010204080808000) * 0x0000804020202020) >> 57];
-	outflank_d8h4h1 &= ((P & 0x0810204080808080) * 0x0020100000040404) >> 59;	// 67812
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_d8h4h1]) & 0x0010204080808000;
-=======
-	outflank_d8h4h1 = OUTFLANK_4[((O & 0x0010204080808000ULL) * 0x0000804020202020ULL) >> 57]
-		& ((((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_4_V[outflank_d8h4h1]) & 0x0010204080808000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_e1h4h8 = OUTFLANK_3[((O & 0x0080808080402000) * 0x0010101010204081) >> 57]
-		& (((P & 0x8080808080402010) * 0x0010101010204081) >> 56);
-=======
-	outflank_e1h4h8 = ((P & 0x8080808080402010) * 0x0010101010204081) >> 56;	// (h8)
-	outflank_e1h4h8 = OUTFLANK_3[((O & 0x0080808080402000) * 0x0010101010204081) >> 57] & rotl8(outflank_e1h4h8, 3);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_e1h4h8 = OUTFLANK_3[((O & 0x0080808080402000) * 0x0010101010204081) >> 57];
 	outflank_e1h4h8 &= rotl8(((P & 0x8080808080402010) * 0x0010101010204081) >> 56, 3);	// (h8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_3_V[outflank_e1h4h8] & 0x0080808080402000;
 
 	outflank_d8h4h1 = OUTFLANK_4[((O & 0x0010204080808000) * 0x0000804020202020) >> 57];
 	outflank_d8h4h1 &= ((P & 0x0810204080808080) * 0x0020100000040404) >> 59;	// 67812
 	flipped |= vertical_mirror(FLIPPED_4_V[outflank_d8h4h1]) & 0x0010204080808000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H(((unsigned int) O >> 25) << 26) & (unsigned int) (P << 1);
 	flipped |= (outflank_h * (unsigned int) -2) >> 1;
-=======
-	outflank_h = outflank_right_32((unsigned int) O, 0x7f000000) & (unsigned int) P;
-	flipped |= (outflank_h * -2) & 0x7f000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 25) << 26) & (unsigned int)(P << 1);
-=======
-	outflank_h = outflank_right_H(((unsigned int) O >> 25) << 26) & (unsigned int) (P << 1);
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped |= (outflank_h * (unsigned int) -2) >> 1;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -2063,42 +1029,16 @@ static unsigned long long flip_H4(const unsigned long long P, const unsigned lon
  */
 static unsigned long long flip_A5(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int outflank_h, outflank_a1a5d8, outflank_a8a5e1;
 	unsigned long long flipped;
 
 	outflank_a1a5d8 = OUTFLANK_4[((O & 0x0004020101010100) * 0x0102040810101010) >> 57];
 	outflank_a1a5d8 &= ((P & 0x0804020101010101) * 0x2040800000000202) >> 59;	// 32187
-<<<<<<< HEAD
-	flipped = FLIPPED_4_V[outflank_a1a5d8] & 0x0004020101010100;
-
-<<<<<<< HEAD
-	outflank_a8a5e1 = OUTFLANK_3[((O & 0x0001010102040800) * 0x0808080808040201) >> 57];
-	outflank_a8a5e1 &= ((P & 0x0101010102040810) * 0x0202020000008040) >> 59;	// 78123
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_a8a5e1]) & 0x0001010102040800;
-=======
-	outflank_a8a5e1 = OUTFLANK_3[((O & 0x0001010102040800ULL) * 0x0808080808040201ULL) >> 57]
-		& (((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_a8a5e1]) & 0x0001010102040800ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	unsigned int outflank_a1a5d8, outflank_a8a5e1, outflank_h;
-=======
-	unsigned int outflank_h, outflank_a1a5d8, outflank_a8a5e1;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	unsigned long long flipped;
-
-	outflank_a1a5d8 = ((P & 0x0804020101010101) * 0x2040800000000202) >> 59;	// 32187
-	outflank_a1a5d8 = OUTFLANK_4[((O & 0x0004020101010100) * 0x0102040810101010) >> 57] & outflank_a1a5d8;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_a1a5d8] & 0x0004020101010100;
 
 	outflank_a8a5e1 = OUTFLANK_3[((O & 0x0001010102040800) * 0x0808080808040201) >> 57];
 	outflank_a8a5e1 &= ((P & 0x0101010102040810) * 0x0202020000008040) >> 59;	// 78123
 	flipped |= vertical_mirror(FLIPPED_3_V[outflank_a8a5e1]) & 0x0001010102040800;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((unsigned int) (O >> 8) + 0x02000000) & (unsigned int) (P >> 8);
 	flipped |= (((unsigned long long) outflank_h << 8) - outflank_h) & 0x0000007e00000000;
@@ -2115,42 +1055,16 @@ static unsigned long long flip_A5(const unsigned long long P, const unsigned lon
  */
 static unsigned long long flip_B5(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int outflank_h, outflank_b1b5e8, outflank_b8b5f1;
 	unsigned long long flipped;
 
 	outflank_b1b5e8 = OUTFLANK_4[((O & 0x0008040202020200) * 0x0081020408080808) >> 57];
 	outflank_b1b5e8 &= ((P & 0x1008040202020202) * 0x1020400000000101) >> 59;	// 32187
-<<<<<<< HEAD
-	flipped = FLIPPED_4_V[outflank_b1b5e8] & 0x0008040202020200;
-
-<<<<<<< HEAD
-	outflank_b8b5f1 = OUTFLANK_3[((O & 0x0002020204081000) * 0x0808080808040201) >> 58];
-	outflank_b8b5f1 &= ((P & 0x0202020204081020) * 0x0101010000004020) >> 59;	// 78123
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_b8b5f1]) & 0x0002020204081000;
-=======
-	outflank_b8b5f1 = OUTFLANK_3[((O & 0x0002020204081000ULL) * 0x0808080808040201ULL) >> 58]
-		& ((((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_b8b5f1]) & 0x0002020204081000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	unsigned int outflank_b1b5e8, outflank_b8b5f1, outflank_h;
-=======
-	unsigned int outflank_h, outflank_b1b5e8, outflank_b8b5f1;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	unsigned long long flipped;
-
-	outflank_b1b5e8 = ((P & 0x1008040202020202) * 0x1020400000000101) >> 59;	// 32187
-	outflank_b1b5e8 = OUTFLANK_4[((O & 0x0008040202020200) * 0x0081020408080808) >> 57] & outflank_b1b5e8;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_b1b5e8] & 0x0008040202020200;
 
 	outflank_b8b5f1 = OUTFLANK_3[((O & 0x0002020204081000) * 0x0808080808040201) >> 58];
 	outflank_b8b5f1 &= ((P & 0x0202020204081020) * 0x0101010000004020) >> 59;	// 78123
 	flipped |= vertical_mirror(FLIPPED_3_V[outflank_b8b5f1]) & 0x0002020204081000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((unsigned int) (O >> 8) + 0x04000000) & (unsigned int) (P >> 8);
 	flipped |= (((unsigned long long) outflank_h << 8) - outflank_h) & 0x0000007c00000000;
@@ -2170,50 +1084,16 @@ static unsigned long long flip_C5(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_c1c5f8, outflank_c8c5g1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_c1c5f8 = OUTFLANK_4[((O & 0x0010080404040400) * 0x0040810204040404) >> 57];
-	outflank_c1c5f8 &= rotl8(((P & 0x2010080404040404) * 0x0040810204040404) >> 56, 2);	// (f8)
-	flipped = FLIPPED_4_V[outflank_c1c5f8] & 0x0010080404040400;
-
-<<<<<<< HEAD
-	outflank_c8c5g1 = OUTFLANK_3[((O & 0x0004040408102000) * 0x0002020202010080) >> 57];
-	outflank_c8c5g1 &= ((P & 0x0404040408102040) * 0x0080808000002010) >> 59;	// 78123
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_c8c5g1]) & 0x0004040408102000;
-=======
-	outflank_c8c5g1 = OUTFLANK_3[((O & 0x0004040408102000ULL) * 0x0002020202010080ULL) >> 57]
-		& ((((P & 0x0404040408102040ULL) >> 2) * 0x0808080808040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_c8c5g1]) & 0x0004040408102000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-	outflank_h = OUTFLANK_2[(O >> 33) & 0x3f] & rotl8(P >> 32, 4);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 32;
-=======
-	outflank_c1c5f8 = OUTFLANK_4[((O & 0x0010080404040400) * 0x0040810204040404) >> 57]
-		& (((P & 0x2010080404040404) * 0x0040810204040404) >> 56);
-=======
-	outflank_c1c5f8 = ((P & 0x2010080404040404) * 0x0040810204040404) >> 56;	// (f8)
-	outflank_c1c5f8 = OUTFLANK_4[((O & 0x0010080404040400) * 0x0040810204040404) >> 57] & rotl8(outflank_c1c5f8, 2);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_c1c5f8 = OUTFLANK_4[((O & 0x0010080404040400) * 0x0040810204040404) >> 57];
 	outflank_c1c5f8 &= rotl8(((P & 0x2010080404040404) * 0x0040810204040404) >> 56, 2);	// (f8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_c1c5f8] & 0x0010080404040400;
 
 	outflank_c8c5g1 = OUTFLANK_3[((O & 0x0004040408102000) * 0x0002020202010080) >> 57];
 	outflank_c8c5g1 &= ((P & 0x0404040408102040) * 0x0080808000002010) >> 59;	// 78123
 	flipped |= vertical_mirror(FLIPPED_3_V[outflank_c8c5g1]) & 0x0004040408102000;
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_2[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x000000ff00000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_2[(O >> 33) & 0x3f] & rotl8(P >> 32, 4);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 32;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	flipped |= (((P << 9) & 0x0000000002000000) | ((P >> 7) & 0x0000020000000000)) & O;
 
@@ -2232,33 +1112,8 @@ static unsigned long long flip_D5(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_v = OUTFLANK_4[((O & 0x0008080808080800) * 0x0020408102040810) >> 57];
-	outflank_v &= ((P & 0x0808080808080808) * 0x0408100000000102) >> 59;	// 32187
-	flipped = FLIPPED_4_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 33) & 0x3f] & rotl8(P >> 32, 3);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 32;
-
-	outflank_d7 = OUTFLANK_3[(((HIDWORD(O) & 0x00020408) + (LODWORD(O) & 0x10204000)) * 0x01010101) >> 25];
-	outflank_d7 &= ((P & 0x0102040810204080) * 0x0040404000004040) >> 59;	// bahgf
-	flipped |= FLIPPED_3_H[outflank_d7] & 0x0002040810204000;
-
-	outflank_d9 = OUTFLANK_3[(((HIDWORD(O) & 0x00201008) + (LODWORD(O) & 0x04020000)) * 0x01010101) >> 25];
-	outflank_d9 &= rotl8((((HIDWORD(P) & 0x40201008) + (LODWORD(P) & 0x04020100)) * 0x01010101) >> 24, 3);	// (g8)
-=======
-	outflank_v = OUTFLANK_4[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-=======
-	outflank_v = ((P & 0x0808080808080808) * 0x0408100000000102) >> 59;	// 32187
-	outflank_v = OUTFLANK_4[((O & 0x0008080808080800) * 0x0020408102040810) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_v = OUTFLANK_4[((O & 0x0008080808080800) * 0x0020408102040810) >> 57];
 	outflank_v &= ((P & 0x0808080808080808) * 0x0408100000000102) >> 59;	// 32187
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_v] & 0x0008080808080800;
 
 	outflank_h = OUTFLANK_3[(O >> 33) & 0x3f] & rotl8(P >> 32, 3);
@@ -2268,19 +1123,8 @@ static unsigned long long flip_D5(const unsigned long long P, const unsigned lon
 	outflank_d7 &= ((P & 0x0102040810204080) * 0x0040404000004040) >> 59;	// bahgf
 	flipped |= FLIPPED_3_H[outflank_d7] & 0x0002040810204000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = OUTFLANK_3[((O & 0x0020100804020000) * 0x0101010101010101) >> 57]
-		& (((P & 0x4020100804020100) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d9 = ((P & 0x4020100804020100) * 0x0101010101010101) >> 56;	// (g8)
-	outflank_d9 = OUTFLANK_3[((O & 0x0020100804020000) * 0x0101010101010101) >> 57] & rotl8(outflank_d9, 3);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_d9 = OUTFLANK_3[(((HIDWORD(O) & 0x00201008) + (LODWORD(O) & 0x04020000)) * 0x01010101) >> 25];
 	outflank_d9 &= rotl8((((HIDWORD(P) & 0x40201008) + (LODWORD(P) & 0x04020100)) * 0x01010101) >> 24, 3);	// (g8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d9] & 0x0020100804020000;
 
 	return flipped;
@@ -2298,33 +1142,8 @@ static unsigned long long flip_E5(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_v = OUTFLANK_4[((O & 0x0010101010101000) * 0x0010204081020408) >> 57];
-	outflank_v &= ((P & 0x1010101010101010) * 0x0204080000000081) >> 59;	// 32187
-	flipped = FLIPPED_4_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 33) & 0x3f] & rotl8(P >> 32, 2);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 32;
-
-	outflank_d7 = OUTFLANK_4[(((HIDWORD(O) & 0x00040810) + (LODWORD(O) & 0x20400000)) * 0x01010101) >> 25];
-	outflank_d7 &= ((P & 0x0204081020408000) * 0x0000202000002020) >> 59;	// cb0hg
-	flipped |= FLIPPED_4_H[outflank_d7] & 0x0004081020400000;
-
-	outflank_d9 = OUTFLANK_4[(((HIDWORD(O) & 0x00402010) + (LODWORD(O) & 0x08040200)) * 0x01010101) >> 25];
-	outflank_d9 &= rotl8((((HIDWORD(P) & 0x80402010) + (LODWORD(P) & 0x08040201)) * 0x01010101) >> 24, 2);	// (h8)
-=======
-	outflank_v = OUTFLANK_4[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-=======
-	outflank_v = ((P & 0x1010101010101010) * 0x0204080000000081) >> 59;	// 32187
-	outflank_v = OUTFLANK_4[((O & 0x0010101010101000) * 0x0010204081020408) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_v = OUTFLANK_4[((O & 0x0010101010101000) * 0x0010204081020408) >> 57];
 	outflank_v &= ((P & 0x1010101010101010) * 0x0204080000000081) >> 59;	// 32187
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_v] & 0x0010101010101000;
 
 	outflank_h = OUTFLANK_4[(O >> 33) & 0x3f] & rotl8(P >> 32, 2);
@@ -2334,19 +1153,8 @@ static unsigned long long flip_E5(const unsigned long long P, const unsigned lon
 	outflank_d7 &= ((P & 0x0204081020408000) * 0x0000202000002020) >> 59;	// cb0hg
 	flipped |= FLIPPED_4_H[outflank_d7] & 0x0004081020400000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = OUTFLANK_4[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d9 = ((P & 0x8040201008040201) * 0x0101010101010101) >> 56;	// (h8)
-	outflank_d9 = OUTFLANK_4[((O & 0x0040201008040200) * 0x0101010101010101) >> 57] & rotl8(outflank_d9, 2);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_d9 = OUTFLANK_4[(((HIDWORD(O) & 0x00402010) + (LODWORD(O) & 0x08040200)) * 0x01010101) >> 25];
 	outflank_d9 &= rotl8((((HIDWORD(P) & 0x80402010) + (LODWORD(P) & 0x08040201)) * 0x01010101) >> 24, 2);	// (h8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d9] & 0x0040201008040200;
 
 	return flipped;
@@ -2364,50 +1172,16 @@ static unsigned long long flip_F5(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_b1f5f8, outflank_c8f5f1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_b1f5f8 = OUTFLANK_4[((O & 0x0020202010080400) * 0x0080808080810204) >> 57];
-	outflank_b1f5f8 &= rotl8(((P & 0x2020202010080402) * 0x0080808080810204) >> 56, 2);	// (f8)
-	flipped = FLIPPED_4_V[outflank_b1f5f8] & 0x0020202010080400;
-
-<<<<<<< HEAD
-	outflank_c8f5f1 = OUTFLANK_3[((O & 0x0008102020202000) * 0x0002010080404040) >> 57];
-	outflank_c8f5f1 &= ((P & 0x0408102020202020) * 0x0100804000001010) >> 59;	// 78123
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_c8f5f1]) & 0x0008102020202000;
-=======
-	outflank_c8f5f1 = OUTFLANK_3[((O & 0x0008102020202000ULL) * 0x0002010080404040ULL) >> 57]
-		& ((((P & 0x0408102020202020ULL) >> 2) * 0x1008040201010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_c8f5f1]) & 0x0008102020202000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-	outflank_h = OUTFLANK_5[(O >> 33) & 0x3f] & rotl8(P >> 32, 1);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 32;
-=======
-	outflank_b1f5f8 = OUTFLANK_4[((O & 0x0020202010080400) * 0x0080808080810204) >> 57]
-		& (((P & 0x2020202010080402) * 0x0080808080810204) >> 56);
-=======
-	outflank_b1f5f8 = ((P & 0x2020202010080402) * 0x0080808080810204) >> 56;	// (f8)
-	outflank_b1f5f8 = OUTFLANK_4[((O & 0x0020202010080400) * 0x0080808080810204) >> 57] & rotl8(outflank_b1f5f8, 2);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_b1f5f8 = OUTFLANK_4[((O & 0x0020202010080400) * 0x0080808080810204) >> 57];
 	outflank_b1f5f8 &= rotl8(((P & 0x2020202010080402) * 0x0080808080810204) >> 56, 2);	// (f8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_b1f5f8] & 0x0020202010080400;
 
 	outflank_c8f5f1 = OUTFLANK_3[((O & 0x0008102020202000) * 0x0002010080404040) >> 57];
 	outflank_c8f5f1 &= ((P & 0x0408102020202020) * 0x0100804000001010) >> 59;	// 78123
 	flipped |= vertical_mirror(FLIPPED_3_V[outflank_c8f5f1]) & 0x0008102020202000;
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_5[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x000000ff00000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_5[(O >> 33) & 0x3f] & rotl8(P >> 32, 1);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 32;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	flipped |= (((P << 7) & 0x0000000040000000) | ((P >> 9) & 0x0000400000000000)) & O;
 
@@ -2426,57 +1200,16 @@ static unsigned long long flip_G5(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_c1g5g8, outflank_d8g5g1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_c1g5g8 = OUTFLANK_4[((O & 0x0040404020100800) * 0x0040404040408102) >> 57];
 	outflank_c1g5g8 &= rotl8(((P & 0x4040404020100804) * 0x0040404040408102) >> 56, 2);	// (g8)
 	flipped = FLIPPED_4_V[outflank_c1g5g8] & 0x0040404020100800;
 
-<<<<<<< HEAD
 	outflank_d8g5g1 = OUTFLANK_3[((O & 0x0010204040404000) * 0x0001008040202020) >> 57];
 	outflank_d8g5g1 &= ((P & 0x0810204040404040) * 0x0080402000000808) >> 59;	// 78123
 	flipped |= vertical_mirror(FLIPPED_3_V[outflank_d8g5g1]) & 0x0010204040404000;
-=======
-	outflank_d8g5g1 = OUTFLANK_3[((O & 0x0010204040404000ULL) * 0x0001008040202020ULL) >> 57]
-		& ((((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_d8g5g1]) & 0x0010204040404000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_c1g5g8 = OUTFLANK_4[((O & 0x0040404020100800) * 0x0040404040408102) >> 57]
-		& (((P & 0x4040404020100804) * 0x0040404040408102) >> 56);
-=======
-	outflank_c1g5g8 = ((P & 0x4040404020100804) * 0x0040404040408102) >> 56;	// (g8)
-	outflank_c1g5g8 = OUTFLANK_4[((O & 0x0040404020100800) * 0x0040404040408102) >> 57] & rotl8(outflank_c1g5g8, 2);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_c1g5g8 = OUTFLANK_4[((O & 0x0040404020100800) * 0x0040404040408102) >> 57];
-	outflank_c1g5g8 &= rotl8(((P & 0x4040404020100804) * 0x0040404040408102) >> 56, 2);	// (g8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	flipped = FLIPPED_4_V[outflank_c1g5g8] & 0x0040404020100800;
-
-	outflank_d8g5g1 = OUTFLANK_3[((O & 0x0010204040404000) * 0x0001008040202020) >> 57];
-	outflank_d8g5g1 &= ((P & 0x0810204040404040) * 0x0080402000000808) >> 59;	// 78123
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_d8g5g1]) & 0x0010204040404000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) (O >> 33) << 27) & (unsigned int) (P >> 6);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 6;
-=======
-	outflank_h = outflank_right_H(O, 0x0000003f00000000) & P;
-	flipped |= (outflank_h * -2) & 0x0000003f00000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 33) << 27) & (unsigned int)(P >> 6);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 6;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_h = outflank_right_H((unsigned int) (O >> 33) << 27) & (unsigned int) (P >> 6);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 6;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 	return flipped;
 }
@@ -2493,57 +1226,16 @@ static unsigned long long flip_H5(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_d1h5h8, outflank_e8h5h1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d1h5h8 = OUTFLANK_4[((O & 0x0080808040201000) * 0x0020202020204081) >> 57];
-	outflank_d1h5h8 &= rotl8(((P & 0x8080808040201008) * 0x0020202020204081) >> 56, 2);	// (h8)
-	flipped = FLIPPED_4_V[outflank_d1h5h8] & 0x0080808040201000;
-
-<<<<<<< HEAD
-	outflank_e8h5h1 = OUTFLANK_3[((O & 0x0020408080808000) * 0x0000804020101010) >> 57];
-	outflank_e8h5h1 &= ((P & 0x1020408080808080) * 0x0040201000000404) >> 59;	// 78123
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_e8h5h1]) & 0x0020408080808000;
-=======
-	outflank_e8h5h1 = OUTFLANK_3[((O & 0x0020408080808000ULL) * 0x0000804020101010ULL) >> 57]
-		& ((((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_3_V[outflank_e8h5h1]) & 0x0020408080808000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_d1h5h8 = OUTFLANK_4[((O & 0x0080808040201000) * 0x0020202020204081) >> 57]
-		& (((P & 0x8080808040201008) * 0x0020202020204081) >> 56);
-=======
-	outflank_d1h5h8 = ((P & 0x8080808040201008) * 0x0020202020204081) >> 56;	// (h8)
-	outflank_d1h5h8 = OUTFLANK_4[((O & 0x0080808040201000) * 0x0020202020204081) >> 57] & rotl8(outflank_d1h5h8, 2);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_d1h5h8 = OUTFLANK_4[((O & 0x0080808040201000) * 0x0020202020204081) >> 57];
 	outflank_d1h5h8 &= rotl8(((P & 0x8080808040201008) * 0x0020202020204081) >> 56, 2);	// (h8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_4_V[outflank_d1h5h8] & 0x0080808040201000;
 
 	outflank_e8h5h1 = OUTFLANK_3[((O & 0x0020408080808000) * 0x0000804020101010) >> 57];
 	outflank_e8h5h1 &= ((P & 0x1020408080808080) * 0x0040201000000404) >> 59;	// 78123
 	flipped |= vertical_mirror(FLIPPED_3_V[outflank_e8h5h1]) & 0x0020408080808000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H((unsigned int) (O >> 33) << 26) & (unsigned int) (P >> 7);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 7;
-=======
-	outflank_h = outflank_right_H(O, 0x0000007f00000000) & P;
-	flipped |= (outflank_h * -2) & 0x0000007f00000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 33) << 26) & (unsigned int)(P >> 7);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 7;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_h = outflank_right_H((unsigned int) (O >> 33) << 26) & (unsigned int) (P >> 7);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 7;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 	return flipped;
 }
@@ -2557,42 +1249,16 @@ static unsigned long long flip_H5(const unsigned long long P, const unsigned lon
  */
 static unsigned long long flip_A6(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int outflank_h, outflank_a1a6c8, outflank_a8a6f1;
 	unsigned long long flipped;
 
 	outflank_a1a6c8 = OUTFLANK_5[((O & 0x0002010101010100) * 0x0102040810202020) >> 57];
 	outflank_a1a6c8 &= ((P & 0x0402010101010101) * 0x1020408000000002) >> 59;	// 43218
-<<<<<<< HEAD
-	flipped = FLIPPED_5_V[outflank_a1a6c8] & 0x0002010101010100;
-
-<<<<<<< HEAD
-	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0001010204081000) * 0x0404040404040201) >> 57];
-	outflank_a8a6f1 &= ((P & 0x0101010204081020) * 0x0202020200000080) >> 59;	// 81234
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_a8a6f1]) & 0x0001010204081000;
-=======
-	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0001010204081000ULL) * 0x0404040404040201ULL) >> 57]
-		& (((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_a8a6f1]) & 0x0001010204081000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	unsigned int outflank_a1a6c8, outflank_a8a6f1, outflank_h;
-=======
-	unsigned int outflank_h, outflank_a1a6c8, outflank_a8a6f1;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	unsigned long long flipped;
-
-	outflank_a1a6c8 = ((P & 0x0402010101010101) * 0x1020408000000002) >> 59;	// 43218
-	outflank_a1a6c8 = OUTFLANK_5[((O & 0x0002010101010100) * 0x0102040810202020) >> 57] & outflank_a1a6c8;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_5_V[outflank_a1a6c8] & 0x0002010101010100;
 
 	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0001010204081000) * 0x0404040404040201) >> 57];
 	outflank_a8a6f1 &= ((P & 0x0101010204081020) * 0x0202020200000080) >> 59;	// 81234
 	flipped |= vertical_mirror(FLIPPED_2_V[outflank_a8a6f1]) & 0x0001010204081000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((unsigned int) (O >> 16) + 0x02000000) & (unsigned int) (P >> 16);
 	flipped |= (((unsigned long long) outflank_h << 16) - outflank_h) & 0x00007e0000000000;
@@ -2609,52 +1275,16 @@ static unsigned long long flip_A6(const unsigned long long P, const unsigned lon
  */
 static unsigned long long flip_B6(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int outflank_h, outflank_b1b6d8, outflank_b8b6g1;
 	unsigned long long flipped;
 
 	outflank_b1b6d8 = OUTFLANK_5[((O & 0x0004020202020200) * 0x0081020408101010) >> 57];
 	outflank_b1b6d8 &= ((P & 0x0804020202020202) * 0x0810204000000001) >> 59;	// 43218
-<<<<<<< HEAD
-	flipped = FLIPPED_5_V[outflank_b1b6d8] & 0x0004020202020200;
-
-<<<<<<< HEAD
-	outflank_b8b6g1 = OUTFLANK_2[((O & 0x0002020408102000) * 0x0404040404040201) >> 58];
-	outflank_b8b6g1 &= ((P & 0x0202020408102040) * 0x0101010100000040) >> 59;	// 81234
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_b8b6g1]) & 0x0002020408102000;
-=======
-	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0002020408102000ULL) * 0x0404040404040201ULL) >> 58]
-		& ((((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_a8a6f1]) & 0x0002020408102000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	unsigned int outflank_a1a6c8, outflank_a8a6f1, outflank_h;
-=======
-	unsigned int outflank_h, outflank_b1b6d8, outflank_b8b6g1;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	unsigned long long flipped;
-
-	outflank_b1b6d8 = ((P & 0x0804020202020202) * 0x0810204000000001) >> 59;	// 43218
-	outflank_b1b6d8 = OUTFLANK_5[((O & 0x0004020202020200) * 0x0081020408101010) >> 57] & outflank_b1b6d8;
-	flipped = FLIPPED_5_V[outflank_b1b6d8] & 0x0004020202020200;
-
-<<<<<<< HEAD
-	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0002020408102000) * 0x0404040404040201) >> 58]
-		& ((((P & 0x0202020408102040) >> 1) * 0x0404040404040201) >> 56);
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_a8a6f1]) & 0x0002020408102000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_b8b6g1 = ((P & 0x0202020408102040) * 0x0101010100000040) >> 59;	// 81234
-	outflank_b8b6g1 = OUTFLANK_2[((O & 0x0002020408102000) * 0x0404040404040201) >> 58] & outflank_b8b6g1;
-=======
 	flipped = FLIPPED_5_V[outflank_b1b6d8] & 0x0004020202020200;
 
 	outflank_b8b6g1 = OUTFLANK_2[((O & 0x0002020408102000) * 0x0404040404040201) >> 58];
 	outflank_b8b6g1 &= ((P & 0x0202020408102040) * 0x0101010100000040) >> 59;	// 81234
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= vertical_mirror(FLIPPED_2_V[outflank_b8b6g1]) & 0x0002020408102000;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	outflank_h = ((unsigned int) (O >> 16) + 0x04000000) & (unsigned int) (P >> 16);
 	flipped |= (((unsigned long long) outflank_h << 16) - outflank_h) & 0x00007c0000000000;
@@ -2674,11 +1304,6 @@ static unsigned long long flip_C6(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d7;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_5[((O & 0x0004040404040400) * 0x0040810204081020) >> 57];
 	outflank_v &= ((P & 0x0404040404040404) * 0x0408102000000002) >> 59;	// 43218
 	flipped = FLIPPED_5_V[outflank_v] & 0x0004040404040400;
@@ -2688,29 +1313,6 @@ static unsigned long long flip_C6(const unsigned long long P, const unsigned lon
 
 	outflank_d7 = OUTFLANK_2[(((HIDWORD(O) & 0x00020408) + (LODWORD(O) & 0x10204000)) * 0x01010101) >> 25];
 	outflank_d7 &= ((P & 0x0102040810204080) * 0x0080808080000080) >> 59;	// ahgfe
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_5[((O & 0x0004040404040400) * 0x0040810204081020) >> 57]
-		& (((P & 0x0404040404040404) * 0x0040810204081020) >> 56);
-=======
-	outflank_v = ((P & 0x0404040404040404) * 0x0408102000000002) >> 59;	// 43218
-	outflank_v = OUTFLANK_5[((O & 0x0004040404040400) * 0x0040810204081020) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_5_V[outflank_v] & 0x0004040404040400;
-
-	outflank_h = OUTFLANK_2[(O >> 41) & 0x3f] & rotl8(P >> 40, 4);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 40;
-
-<<<<<<< HEAD
-	outflank_d7 = OUTFLANK_2[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0102040810204080) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d7 = ((P & 0x0102040810204080) * 0x0080808080000080) >> 59;	// ahgfe
-	outflank_d7 = OUTFLANK_2[((O & 0x0002040810204000) * 0x0101010101010101) >> 57] & outflank_d7;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_2_H[outflank_d7] & 0x0002040810204000;
 
 	flipped |= ((P >> 9) | (P << 9)) & 0x0008000200000000 & O;
@@ -2730,11 +1332,6 @@ static unsigned long long flip_D6(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_5[((O & 0x0008080808080800) * 0x0020408102040810) >> 57];
 	outflank_v &= ((P & 0x0808080808080808) * 0x0204081020408001) >> 59;	// 43218
 	flipped = FLIPPED_5_V[outflank_v] & 0x0008080808080800;
@@ -2744,29 +1341,6 @@ static unsigned long long flip_D6(const unsigned long long P, const unsigned lon
 
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 16) & 0x08142240) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000081422418000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_5[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-=======
-	outflank_v = ((P & 0x0808080808080808) * 0x0204081020408001) >> 59;	// 43218
-	outflank_v = OUTFLANK_5[((O & 0x0008080808080800) * 0x0020408102040810) >> 57] & outflank_v;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_5_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 41) & 0x3f] & rotl8(P >> 40, 3);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 40;
-
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_3[((O & 0x0000081422400000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000081422418000) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d = ((P & 0x0000081422418000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0000081422400000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000081422400000;	// A3D6H2
 
 	flipped |= (((P >> 9) & 0x0010000000000000) | ((P >> 7) & 0x0004000000000000)) & O;
@@ -2786,11 +1360,6 @@ static unsigned long long flip_E6(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_5[((O & 0x0010101010101000) * 0x0010204081020408) >> 57];
 	outflank_v &= rotl8(((P & 0x1010101010101010) * 0x0010204081020408) >> 56, 1);	// (e8)
 	flipped = FLIPPED_5_V[outflank_v] & 0x0010101010101000;
@@ -2800,29 +1369,6 @@ static unsigned long long flip_E6(const unsigned long long P, const unsigned lon
 
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 16) & 0x10284402) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000102844820100) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_5[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-=======
-	outflank_v = ((P & 0x1010101010101010) * 0x0010204081020408) >> 56;	// (e8)
-	outflank_v = OUTFLANK_5[((O & 0x0010101010101000) * 0x0010204081020408) >> 57] & rotl8(outflank_v, 1);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_5_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 41) & 0x3f] & rotl8(P >> 40, 2);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 40;
-
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_4[((O & 0x0000102844020000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000102844820100) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d = ((P & 0x0000102844820100) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0000102844020000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000102844020000;	// A2E6H3
 
 	flipped |= (((P >> 9) & 0x0020000000000000) | ((P >> 7) & 0x0008000000000000)) & O;
@@ -2842,11 +1388,6 @@ static unsigned long long flip_F6(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_v, outflank_d9;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	outflank_v = OUTFLANK_5[((O & 0x0020202020202000) * 0x0008102040810204) >> 57];
 	outflank_v &= rotl8(((P & 0x2020202020202020) * 0x0008102040810204) >> 56, 1);	// (f8)
 	flipped = FLIPPED_5_V[outflank_v] & 0x0020202020202000;
@@ -2858,31 +1399,6 @@ static unsigned long long flip_F6(const unsigned long long P, const unsigned lon
 
 	outflank_d9 = OUTFLANK_5[(((HIDWORD(O) & 0x00402010) + (LODWORD(O) & 0x08040200)) * 0x01010101) >> 25];
 	outflank_d9 &= rotl8((((HIDWORD(P) & 0x80402010) + (LODWORD(P) & 0x08040201)) * 0x01010101) >> 24, 1);	// (h8)
-<<<<<<< HEAD
-=======
-	outflank_v = OUTFLANK_5[((O & 0x0020202020202000) * 0x0008102040810204) >> 57]
-		& (((P & 0x2020202020202020) * 0x0008102040810204) >> 56);
-=======
-	outflank_v = ((P & 0x2020202020202020) * 0x0008102040810204) >> 56;	// (f8)
-	outflank_v = OUTFLANK_5[((O & 0x0020202020202000) * 0x0008102040810204) >> 57] & rotl8(outflank_v, 1);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-	flipped = FLIPPED_5_V[outflank_v] & 0x0020202020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 41) & 0x3f] & rotl8(P >> 40, 1);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 40;
-
-	flipped |= ((P >> 7) | (P << 7)) & 0x0010004000000000 & O;
-
-<<<<<<< HEAD
-	outflank_d9 = OUTFLANK_5[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_d9 = ((P & 0x8040201008040201) * 0x0101010101010101) >> 56;	// (h8)
-	outflank_d9 = OUTFLANK_5[((O & 0x0040201008040200) * 0x0101010101010101) >> 57] & rotl8(outflank_d9, 1);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_5_H[outflank_d9] & 0x0040201008040200;
 
 	return flipped;
@@ -2900,57 +1416,16 @@ static unsigned long long flip_G6(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_b1g6g8, outflank_e8g6g1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_b1g6g8 = OUTFLANK_5[((O & 0x0040402010080400) * 0x0080808080808102) >> 57];
-	outflank_b1g6g8 &= rotl8(((P & 0x4040402010080402) * 0x0080808080808102) >> 56, 1);	// (g8)
-	flipped = FLIPPED_5_V[outflank_b1g6g8] & 0x0040402010080400;
-
-<<<<<<< HEAD
-	outflank_e8g6g1 = OUTFLANK_2[((O & 0x0020404040404000) * 0x0001008040201010) >> 57];
-	outflank_e8g6g1 &= ((P & 0x1020404040404040) * 0x0100804020000008) >> 59;	// 81234
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_e8g6g1]) & 0x0020404040404000;
-=======
-	outflank_e8g6g1 = OUTFLANK_2[((O & 0x0020404040404000ULL) * 0x0001008040201010ULL) >> 57]
-		& ((((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_e8g6g1]) & 0x0020404040404000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_b1g6g8 = OUTFLANK_5[((O & 0x0040402010080400) * 0x0080808080808102) >> 57]
-		& (((P & 0x4040402010080402) * 0x0080808080808102) >> 56);
-=======
-	outflank_b1g6g8 = ((P & 0x4040402010080402) * 0x0080808080808102) >> 56;	// (g8)
-	outflank_b1g6g8 = OUTFLANK_5[((O & 0x0040402010080400) * 0x0080808080808102) >> 57] & rotl8(outflank_b1g6g8, 1);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_b1g6g8 = OUTFLANK_5[((O & 0x0040402010080400) * 0x0080808080808102) >> 57];
 	outflank_b1g6g8 &= rotl8(((P & 0x4040402010080402) * 0x0080808080808102) >> 56, 1);	// (g8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_5_V[outflank_b1g6g8] & 0x0040402010080400;
 
 	outflank_e8g6g1 = OUTFLANK_2[((O & 0x0020404040404000) * 0x0001008040201010) >> 57];
 	outflank_e8g6g1 &= ((P & 0x1020404040404040) * 0x0100804020000008) >> 59;	// 81234
 	flipped |= vertical_mirror(FLIPPED_2_V[outflank_e8g6g1]) & 0x0020404040404000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) (O >> 41) << 27) & (unsigned int) (P >> 14);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 14;
-=======
-	outflank_h = outflank_right_H(O, 0x00003f0000000000) & P;
-	flipped |= (outflank_h * -2) & 0x00003f0000000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 41) << 27) & (unsigned int)(P >> 14);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 14;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_h = outflank_right_H((unsigned int) (O >> 41) << 27) & (unsigned int) (P >> 14);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 14;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 	return flipped;
 }
@@ -2967,57 +1442,16 @@ static unsigned long long flip_H6(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h, outflank_c1h6h8, outflank_f8h6h1;
 	unsigned long long flipped;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_c1h6h8 = OUTFLANK_5[((O & 0x0080804020100800) * 0x0040404040404081) >> 57];
-	outflank_c1h6h8 &= rotl8(((P & 0x8080804020100804) * 0x0040404040404081) >> 56, 1);	// (h8)
-	flipped = FLIPPED_5_V[outflank_c1h6h8] & 0x0080804020100800;
-
-<<<<<<< HEAD
-	outflank_f8h6h1 = OUTFLANK_2[((O & 0x0040808080808000) * 0x0000804020100808) >> 57];
-	outflank_f8h6h1 &= ((P & 0x2040808080808080) * 0x0080402010000004) >> 59;	// 81234
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_f8h6h1]) & 0x0040808080808000;
-=======
-	outflank_f8h6h1 = OUTFLANK_2[((O & 0x0040808080808000ULL) * 0x0000804020100808ULL) >> 57]
-		& ((((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56);
-	flipped |= vertical_mirror(FLIPPED_2_V[outflank_f8h6h1]) & 0x0040808080808000ULL;
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	outflank_c1h6h8 = OUTFLANK_5[((O & 0x0080804020100800) * 0x0040404040404081) >> 57]
-		& (((P & 0x8080804020100804) * 0x0040404040404081) >> 56);
-=======
-	outflank_c1h6h8 = ((P & 0x8080804020100804) * 0x0040404040404081) >> 56;	// (h8)
-	outflank_c1h6h8 = OUTFLANK_5[((O & 0x0080804020100800) * 0x0040404040404081) >> 57] & rotl8(outflank_c1h6h8, 1);
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_c1h6h8 = OUTFLANK_5[((O & 0x0080804020100800) * 0x0040404040404081) >> 57];
 	outflank_c1h6h8 &= rotl8(((P & 0x8080804020100804) * 0x0040404040404081) >> 56, 1);	// (h8)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = FLIPPED_5_V[outflank_c1h6h8] & 0x0080804020100800;
 
 	outflank_f8h6h1 = OUTFLANK_2[((O & 0x0040808080808000) * 0x0000804020100808) >> 57];
 	outflank_f8h6h1 &= ((P & 0x2040808080808080) * 0x0080402010000004) >> 59;	// 81234
 	flipped |= vertical_mirror(FLIPPED_2_V[outflank_f8h6h1]) & 0x0040808080808000;
->>>>>>> 6506166 (More SSE optimizations)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) (O >> 41) << 26) & (unsigned int) (P >> 15);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 15;
-=======
-	outflank_h = outflank_right_H(O, 0x00007f0000000000) & P;
-	flipped |= (outflank_h * -2) & 0x00007f0000000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 41) << 26) & (unsigned int)(P >> 15);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 15;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_h = outflank_right_H((unsigned int) (O >> 41) << 26) & (unsigned int) (P >> 15);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 15;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 	return flipped;
 }
@@ -3031,18 +1465,8 @@ static unsigned long long flip_H6(const unsigned long long P, const unsigned lon
  */
 static unsigned long long flip_A7(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d7;
-=======
-	unsigned long long flipped, outflank_v, outflank_d7;
-	unsigned int outflank_h;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	unsigned int outflank_h;
 	unsigned long long flipped, outflank_v, outflank_d7;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	outflank_v = outflank_right(O, 0x0000010101010101) & P;
 	flipped  = (outflank_v * -2) & 0x0000010101010101;
@@ -3065,18 +1489,8 @@ static unsigned long long flip_A7(const unsigned long long P, const unsigned lon
  */
 static unsigned long long flip_B7(const unsigned long long P, const unsigned long long O)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int outflank_h;
 	unsigned long long flipped, outflank_v, outflank_d7;
-=======
-	unsigned long long flipped, outflank_v, outflank_d7;
-	unsigned int outflank_h;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d7;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	outflank_v = outflank_right(O, 0x0000020202020202) & P;
 	flipped  = (outflank_v * -2) & 0x0000020202020202;
@@ -3104,28 +1518,13 @@ static unsigned long long flip_C7(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0000040404040404) & P;
 	flipped  = (outflank_v * -2) & 0x0000040404040404;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x00040a10) + (LODWORD(O) & 0x20400000)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x00040a1120408000) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]...
-	flipped |= FLIPPED_2_H[outflank_d] & 0x00040a1020400000;	// A5C7H2
-
-	outflank_h = OUTFLANK_2[(O >> 49) & 0x3f] & rotl8(P >> 48, 4);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 48;
-=======
 
 	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x00040a10) + (LODWORD(O) & 0x20400000)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x00040a1120408000) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]...
 	flipped |= FLIPPED_2_H[outflank_d] & 0x00040a1020400000;	// A5C7H2
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_2[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x00ff000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_2[(O >> 49) & 0x3f] & rotl8(P >> 48, 4);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 48;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3144,28 +1543,13 @@ static unsigned long long flip_D7(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0000080808080808) & P;
 	flipped  = (outflank_v * -2) & 0x0000080808080808;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 24) & 0x08142240) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0008142241800000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0008142240000000;	// A4D7H3
-
-	outflank_h = OUTFLANK_3[(O >> 49) & 0x3f] & rotl8(P >> 48, 3);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 48;
-=======
 
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 24) & 0x08142240) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0008142241800000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0008142240000000;	// A4D7H3
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_3[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x00ff000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_3[(O >> 49) & 0x3f] & rotl8(P >> 48, 3);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 48;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3184,28 +1568,13 @@ static unsigned long long flip_E7(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0000101010101010) & P;
 	flipped  = (outflank_v * -2) & 0x0000101010101010;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 24) & 0x10284402) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0010284482010000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0010284402000000;	// A3E7H4
-
-	outflank_h = OUTFLANK_4[(O >> 49) & 0x3f] & rotl8(P >> 48, 2);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 48;
-=======
 
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 24) & 0x10284402) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0010284482010000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0010284402000000;	// A3E7H4
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_4[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x00ff000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_4[(O >> 49) & 0x3f] & rotl8(P >> 48, 2);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 48;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3224,28 +1593,13 @@ static unsigned long long flip_F7(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0000202020202020) & P;
 	flipped  = (outflank_v * -2) & 0x0000202020202020;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x00205008) + (LODWORD(O) & 0x04020000)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0020508804020100) * 0x0101010101010101) >> 55;	// hgfe[dcbah]...
-	flipped |= FLIPPED_5_H[outflank_d] & 0x0020500804020000;	// A2F7H5
-
-	outflank_h = OUTFLANK_5[(O >> 49) & 0x3f] & rotl8(P >> 48, 1);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 48;
-=======
 
 	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x00205008) + (LODWORD(O) & 0x04020000)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0020508804020100) * 0x0101010101010101) >> 55;	// hgfe[dcbah]...
 	flipped |= FLIPPED_5_H[outflank_d] & 0x0020500804020000;	// A2F7H5
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_5[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x00ff000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_5[(O >> 49) & 0x3f] & rotl8(P >> 48, 1);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 48;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3264,37 +1618,13 @@ static unsigned long long flip_G7(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0000404040404040) & P;
 	flipped  = (outflank_v * -2) & 0x0000404040404040;
-<<<<<<< HEAD
-=======
 
 	outflank_d9 = outflank_right(O, 0x0000201008040201) & P;
 	flipped |= (outflank_d9 * -2) & 0x0000201008040201;
->>>>>>> 6506166 (More SSE optimizations)
 
-	outflank_d9 = outflank_right(O, 0x0000201008040201) & P;
-	flipped |= (outflank_d9 * -2) & 0x0000201008040201;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) (O >> 49) << 27) & (unsigned int) (P >> 22);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 22;
-=======
-	outflank_h = outflank_right_H(O, 0x003f000000000000) & P;
-	flipped |= (outflank_h * -2) & 0x003f000000000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 49) << 27) & (unsigned int)(P >> 22);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 22;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_h = outflank_right_H((unsigned int) (O >> 49) << 27) & (unsigned int) (P >> 22);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 22;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -3312,37 +1642,13 @@ static unsigned long long flip_H7(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0000808080808080) & P;
 	flipped  = (outflank_v * -2) & 0x0000808080808080;
-<<<<<<< HEAD
-=======
-
-	outflank_d9 = outflank_right(O, 0x0000402010080402) & P;
-	flipped |= (outflank_d9 * -2) & 0x0000402010080402;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_d9 = outflank_right(O, 0x0000402010080402) & P;
 	flipped |= (outflank_d9 * -2) & 0x0000402010080402;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = outflank_right_H((unsigned int) (O >> 49) << 26) & (unsigned int) (P >> 23);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 23;
-=======
-	outflank_h = outflank_right_H(O, 0x007f000000000000) & P;
-	flipped |= (outflank_h * -2) & 0x007f000000000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 49) << 26) & (unsigned int)(P >> 23);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 23;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
-	outflank_h = outflank_right_H((unsigned int) (O >> 49) << 26) & (unsigned int) (P >> 23);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 23;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -3406,28 +1712,13 @@ static unsigned long long flip_C8(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0004040404040404) & P;
 	flipped  = (outflank_v * -2) & 0x0004040404040404;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x040a1020) + (LODWORD(O) & 0x40000000)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x040a112040800000) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]d0ba...
-	flipped |= FLIPPED_2_H[outflank_d] & 0x040a102040000000;	// A6C8H3
-
-	outflank_h = OUTFLANK_2[(O >> 57) & 0x3f] & rotl8(P >> 56, 4);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 56;
-=======
 
 	outflank_d = OUTFLANK_2[(((HIDWORD(O) & 0x040a1020) + (LODWORD(O) & 0x40000000)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x040a112040800000) * 0x0101010101010101) >> 52;	// hgfedcb[ahgfe]d0ba...
 	flipped |= FLIPPED_2_H[outflank_d] & 0x040a102040000000;	// A6C8H3
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_2[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_2_H[outflank_h] & 0xff00000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_2[(O >> 57) & 0x3f] & rotl8(P >> 56, 4);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_2_H[outflank_h] << 56;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3446,28 +1737,13 @@ static unsigned long long flip_D8(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0008080808080808) & P;
 	flipped  = (outflank_v * -2) & 0x0008080808080808;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_3[((HIDWORD(O) & 0x08142240) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0814224180000000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]e0cba...
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0814224000000000;	// A5D8H4
-
-	outflank_h = OUTFLANK_3[(O >> 57) & 0x3f] & rotl8(P >> 56, 3);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 56;
-=======
 
 	outflank_d = OUTFLANK_3[((HIDWORD(O) & 0x08142240) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0814224180000000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]e0cba...
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0814224000000000;	// A5D8H4
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_3[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_3_H[outflank_h] & 0xff00000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_3[(O >> 57) & 0x3f] & rotl8(P >> 56, 3);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 56;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3486,7 +1762,6 @@ static unsigned long long flip_E8(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0010101010101010) & P;
 	flipped  = (outflank_v * -2) & 0x0010101010101010;
-<<<<<<< HEAD
 
 	outflank_d = OUTFLANK_4[((HIDWORD(O) & 0x10284402) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x1028448201000000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]f0dcba...
@@ -3494,20 +1769,6 @@ static unsigned long long flip_E8(const unsigned long long P, const unsigned lon
 
 	outflank_h = OUTFLANK_4[(O >> 57) & 0x3f] & rotl8(P >> 56, 2);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 56;
-=======
-
-	outflank_d = OUTFLANK_4[((HIDWORD(O) & 0x10284402) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x1028448201000000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]f0dcba...
-	flipped |= FLIPPED_4_H[outflank_d] & 0x1028440200000000;	// A4E8H5
-
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_4[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_4_H[outflank_h] & 0xff00000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
-	outflank_h = OUTFLANK_4[(O >> 57) & 0x3f] & rotl8(P >> 56, 2);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 56;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3526,28 +1787,13 @@ static unsigned long long flip_F8(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0020202020202020) & P;
 	flipped  = (outflank_v * -2) & 0x0020202020202020;
-<<<<<<< HEAD
-
-	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x20500804) + (LODWORD(O) & 0x02000000)) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x2050880402010000) * 0x0101010101010101) >> 55;	// hgfe[dcbah]g0edcba...
-	flipped |= FLIPPED_5_H[outflank_d] & 0x2050080402000000;	// A3F8H6
-
-	outflank_h = OUTFLANK_5[(O >> 57) & 0x3f] & rotl8(P >> 56, 1);
-	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 56;
-=======
 
 	outflank_d = OUTFLANK_5[(((HIDWORD(O) & 0x20500804) + (LODWORD(O) & 0x02000000)) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x2050880402010000) * 0x0101010101010101) >> 55;	// hgfe[dcbah]g0edcba...
 	flipped |= FLIPPED_5_H[outflank_d] & 0x2050080402000000;	// A3F8H6
 
-<<<<<<< HEAD
-	outflank_h = OUTFLANK_5[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_5_H[outflank_h] & 0xff00000000000000;
->>>>>>> 6506166 (More SSE optimizations)
-=======
 	outflank_h = OUTFLANK_5[(O >> 57) & 0x3f] & rotl8(P >> 56, 1);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_5_H[outflank_h] << 56;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
 
 	return flipped;
 }
@@ -3566,37 +1812,13 @@ static unsigned long long flip_G8(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0040404040404040) & P;
 	flipped  = (outflank_v * -2) & 0x0040404040404040;
-<<<<<<< HEAD
-=======
-
-	outflank_d9 = outflank_right(O, 0x0020100804020100) & P;
-	flipped |= (outflank_d9 * -2) & 0x0020100804020100;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_d9 = outflank_right(O, 0x0020100804020100) & P;
 	flipped |= (outflank_d9 * -2) & 0x0020100804020100;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) (O >> 57) << 27) & (unsigned int) (P >> 30);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 30;
-=======
-	outflank_h = outflank_right_H(O, 0x3f00000000000000) & P;
-	flipped |= (outflank_h * -2) & 0x3f00000000000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 57) << 27) & (unsigned int)(P >> 30);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 30;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_h = outflank_right_H((unsigned int) (O >> 57) << 27) & (unsigned int) (P >> 30);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 30;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
@@ -3614,37 +1836,13 @@ static unsigned long long flip_H8(const unsigned long long P, const unsigned lon
 
 	outflank_v = outflank_right(O, 0x0080808080808080) & P;
 	flipped  = (outflank_v * -2) & 0x0080808080808080;
-<<<<<<< HEAD
-=======
-
-	outflank_d9 = outflank_right(O, 0x0040201008040201) & P;
-	flipped |= (outflank_d9 * -2) & 0x0040201008040201;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_d9 = outflank_right(O, 0x0040201008040201) & P;
 	flipped |= (outflank_d9 * -2) & 0x0040201008040201;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = outflank_right_H((unsigned int) (O >> 57) << 26) & (unsigned int) (P >> 31);
-	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 31;
-=======
-	outflank_h = outflank_right_H(O, 0x7f00000000000000) & P;
-	flipped |= (outflank_h * -2) & 0x7f00000000000000;
->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan)
-=======
-	outflank_h = outflank_right_H((unsigned int)(O >> 57) << 26) & (unsigned int)(P >> 31);
-	flipped |= (unsigned long long)(outflank_h * (unsigned int) -2) << 31;
->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs)
-=======
 	outflank_h = outflank_right_H((unsigned int) (O >> 57) << 26) & (unsigned int) (P >> 31);
 	flipped |= (unsigned long long) (outflank_h * (unsigned int) -2) << 31;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
-=======
->>>>>>> 6506166 (More SSE optimizations)
 	return flipped;
 }
 
diff --git a/src/flip_bmi2.c b/src/flip_bmi2.c
index f0af621..09b984d 100644
--- a/src/flip_bmi2.c
+++ b/src/flip_bmi2.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file flip_bmi2.c
  *
@@ -281,287 +277,3 @@ unsigned long long flip(int pos, unsigned long long P, unsigned long long O)
 
 	return flipped;
 }
-<<<<<<< HEAD
-=======
-/**
- * @file flip_bmi2.c
- *
- * This module deals with flipping discs.
- *
- * A function is provided for each square of the board. These functions are
- * gathered into an array of functions, so that a fast access to each function
- * is allowed. The generic form of the function take as input the player and
- * the opponent bitboards and return the flipped squares into a bitboard.
- *
- * Given the following notation:
- *  - x = square where we play,
- *  - P = player's disc pattern,
- *  - O = opponent's disc pattern,
- * the basic principle is to read into an array the result of a move. Doing
- * this is easier for a single line ; so we can use arrays of the form:
- *  - ARRAY[x][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by the 
- * BMI2 PEXT instruction.
- * Once we get our 8-bits disc patterns,a first array (OUTFLANK) is used to
- * get the player's discs that surround the opponent discs:
- *  - outflank = OUTFLANK[x][O] & P
- * The result is then used as an index to access a second array giving the
- * flipped discs according to the surrounding player's discs:
- *  - flipped = FLIPPED[x][outflank].
- * Finally, BMI2 PDEP instruction transform the 8-bits disc pattern back into a
- * 64-bits disc pattern, and the flipped squares for each line are gathered and
- * returned to generate moves.
- *
- * @date 1998 - 2014
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include <x86intrin.h>
-
-/* bit masks for diagonal/vertical/all lines */
-static const unsigned long long mask_x[66][4] = {
-	{ 0x0000000000000001ULL, 0x8040201008040201ULL, 0x0101010101010101ULL, 0x81412111090503ffULL },
-	{ 0x0000000000000102ULL, 0x0080402010080402ULL, 0x0202020202020202ULL, 0x02824222120a07ffULL },
-	{ 0x0000000000010204ULL, 0x0000804020100804ULL, 0x0404040404040404ULL, 0x0404844424150effULL },
-	{ 0x0000000001020408ULL, 0x0000008040201008ULL, 0x0808080808080808ULL, 0x08080888492a1cffULL },
-	{ 0x0000000102040810ULL, 0x0000000080402010ULL, 0x1010101010101010ULL, 0x10101011925438ffULL },
-	{ 0x0000010204081020ULL, 0x0000000000804020ULL, 0x2020202020202020ULL, 0x2020212224a870ffULL },
-	{ 0x0001020408102040ULL, 0x0000000000008040ULL, 0x4040404040404040ULL, 0x404142444850e0ffULL },
-	{ 0x0102040810204080ULL, 0x0000000000000080ULL, 0x8080808080808080ULL, 0x8182848890a0c0ffULL },
-	{ 0x0000000000000102ULL, 0x4020100804020104ULL, 0x0101010101010101ULL, 0x412111090503ff03ULL },
-	{ 0x0000000000010204ULL, 0x8040201008040201ULL, 0x0202020202020202ULL, 0x824222120a07ff07ULL },
-	{ 0x0000000001020408ULL, 0x0080402010080402ULL, 0x0404040404040404ULL, 0x04844424150eff0eULL },
-	{ 0x0000000102040810ULL, 0x0000804020100804ULL, 0x0808080808080808ULL, 0x080888492a1cff1cULL },
-	{ 0x0000010204081020ULL, 0x0000008040201008ULL, 0x1010101010101010ULL, 0x101011925438ff38ULL },
-	{ 0x0001020408102040ULL, 0x0000000080402010ULL, 0x2020202020202020ULL, 0x20212224a870ff70ULL },
-	{ 0x0102040810204080ULL, 0x0000000000804020ULL, 0x4040404040404040ULL, 0x4142444850e0ffe0ULL },
-	{ 0x0204081020408001ULL, 0x0000000000008040ULL, 0x8080808080808080ULL, 0x82848890a0c0ffc0ULL },
-	{ 0x0000000000010204ULL, 0x201008040201000aULL, 0x0101010101010101ULL, 0x2111090503ff0305ULL },
-	{ 0x0000000001020408ULL, 0x4020100804020101ULL, 0x0202020202020202ULL, 0x4222120a07ff070aULL },
-	{ 0x0000000102040810ULL, 0x8040201008040201ULL, 0x0404040404040404ULL, 0x844424150eff0e15ULL },
-	{ 0x0000010204081020ULL, 0x0080402010080402ULL, 0x0808080808080808ULL, 0x0888492a1cff1c2aULL },
-	{ 0x0001020408102040ULL, 0x0000804020100804ULL, 0x1010101010101010ULL, 0x1011925438ff3854ULL },
-	{ 0x0102040810204080ULL, 0x0000008040201008ULL, 0x2020202020202020ULL, 0x212224a870ff70a8ULL },
-	{ 0x0204081020408001ULL, 0x0000000080402010ULL, 0x4040404040404040ULL, 0x42444850e0ffe050ULL },
-	{ 0x0408102040800003ULL, 0x0000000000804020ULL, 0x8080808080808080ULL, 0x848890a0c0ffc0a0ULL },
-	{ 0x0000000001020408ULL, 0x1008040201000016ULL, 0x0101010101010101ULL, 0x11090503ff030509ULL },
-	{ 0x0000000102040810ULL, 0x2010080402010005ULL, 0x0202020202020202ULL, 0x22120a07ff070a12ULL },
-	{ 0x0000010204081020ULL, 0x4020100804020101ULL, 0x0404040404040404ULL, 0x4424150eff0e1524ULL },
-	{ 0x0001020408102040ULL, 0x8040201008040201ULL, 0x0808080808080808ULL, 0x88492a1cff1c2a49ULL },
-	{ 0x0102040810204080ULL, 0x0080402010080402ULL, 0x1010101010101010ULL, 0x11925438ff385492ULL },
-	{ 0x0204081020408001ULL, 0x0000804020100804ULL, 0x2020202020202020ULL, 0x2224a870ff70a824ULL },
-	{ 0x0408102040800003ULL, 0x0000008040201008ULL, 0x4040404040404040ULL, 0x444850e0ffe05048ULL },
-	{ 0x0810204080000007ULL, 0x0000000080402010ULL, 0x8080808080808080ULL, 0x8890a0c0ffc0a090ULL },
-	{ 0x0000000102040810ULL, 0x080402010000002eULL, 0x0101010101010101ULL, 0x090503ff03050911ULL },
-	{ 0x0000010204081020ULL, 0x100804020100000dULL, 0x0202020202020202ULL, 0x120a07ff070a1222ULL },
-	{ 0x0001020408102040ULL, 0x2010080402010003ULL, 0x0404040404040404ULL, 0x24150eff0e152444ULL },
-	{ 0x0102040810204080ULL, 0x4020100804020101ULL, 0x0808080808080808ULL, 0x492a1cff1c2a4988ULL },
-	{ 0x0204081020408002ULL, 0x8040201008040201ULL, 0x1010101010101010ULL, 0x925438ff38549211ULL },
-	{ 0x0408102040800005ULL, 0x0080402010080402ULL, 0x2020202020202020ULL, 0x24a870ff70a82422ULL },
-	{ 0x081020408000000bULL, 0x0000804020100804ULL, 0x4040404040404040ULL, 0x4850e0ffe0504844ULL },
-	{ 0x1020408000000017ULL, 0x0000008040201008ULL, 0x8080808080808080ULL, 0x90a0c0ffc0a09088ULL },
-	{ 0x0000010204081020ULL, 0x040201000000005eULL, 0x0101010101010101ULL, 0x0503ff0305091121ULL },
-	{ 0x0001020408102040ULL, 0x080402010000001dULL, 0x0202020202020202ULL, 0x0a07ff070a122242ULL },
-	{ 0x0102040810204080ULL, 0x100804020100000bULL, 0x0404040404040404ULL, 0x150eff0e15244484ULL },
-	{ 0x0204081020408001ULL, 0x2010080402010003ULL, 0x0808080808080808ULL, 0x2a1cff1c2a498808ULL },
-	{ 0x0408102040800003ULL, 0x4020100804020101ULL, 0x1010101010101010ULL, 0x5438ff3854921110ULL },
-	{ 0x081020408000000eULL, 0x8040201008040201ULL, 0x2020202020202020ULL, 0xa870ff70a8242221ULL },
-	{ 0x102040800000001dULL, 0x0080402010080402ULL, 0x4040404040404040ULL, 0x50e0ffe050484442ULL },
-	{ 0x204080000000003bULL, 0x0000804020100804ULL, 0x8080808080808080ULL, 0xa0c0ffc0a0908884ULL },
-	{ 0x0001020408102040ULL, 0x02010000000000beULL, 0x0101010101010101ULL, 0x03ff030509112141ULL },
-	{ 0x0102040810204080ULL, 0x040201000000003dULL, 0x0202020202020202ULL, 0x07ff070a12224282ULL },
-	{ 0x0204081020408001ULL, 0x080402010000001bULL, 0x0404040404040404ULL, 0x0eff0e1524448404ULL },
-	{ 0x0408102040800003ULL, 0x1008040201000007ULL, 0x0808080808080808ULL, 0x1cff1c2a49880808ULL },
-	{ 0x0810204080000007ULL, 0x2010080402010003ULL, 0x1010101010101010ULL, 0x38ff385492111010ULL },
-	{ 0x102040800000000fULL, 0x4020100804020101ULL, 0x2020202020202020ULL, 0x70ff70a824222120ULL },
-	{ 0x204080000000003eULL, 0x8040201008040201ULL, 0x4040404040404040ULL, 0xe0ffe05048444241ULL },
-	{ 0x408000000000007dULL, 0x0080402010080402ULL, 0x8080808080808080ULL, 0xc0ffc0a090888482ULL },
-	{ 0x0102040810204080ULL, 0x010000000000027eULL, 0x0101010101010101ULL, 0xff03050911214181ULL },
-	{ 0x0204081020408001ULL, 0x020100000000007dULL, 0x0202020202020202ULL, 0xff070a1222428202ULL },
-	{ 0x0408102040800003ULL, 0x040201000000003bULL, 0x0404040404040404ULL, 0xff0e152444840404ULL },
-	{ 0x0810204080000007ULL, 0x0804020100000017ULL, 0x0808080808080808ULL, 0xff1c2a4988080808ULL },
-	{ 0x102040800000000fULL, 0x1008040201000007ULL, 0x1010101010101010ULL, 0xff38549211101010ULL },
-	{ 0x204080000000001fULL, 0x2010080402010003ULL, 0x2020202020202020ULL, 0xff70a82422212020ULL },
-	{ 0x408000000000003fULL, 0x4020100804020101ULL, 0x4040404040404040ULL, 0xffe0504844424140ULL },
-	{ 0x800000000000017eULL, 0x8040201008040201ULL, 0x8080808080808080ULL, 0xffc0a09088848281ULL },
-	{ 0, 0, 0, 0 },	// pass
-	{ 0, 0, 0, 0 }
-};
-
-/** outflank array */
-const unsigned char OUTFLANK[8][64] = {
-	{
-		0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x20,
-		0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x40,
-		0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x20,
-		0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x80,
-	},
-	{
-		0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00,
-		0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00,
-		0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00,
-		0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x80, 0x00,
-	},
-	{
-		0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x21, 0x00, 0x00,
-		0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x40, 0x41, 0x00, 0x00,
-		0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x21, 0x00, 0x00,
-		0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x80, 0x81, 0x00, 0x00,
-	},
-	{
-		0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x22, 0x21, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0x42, 0x41, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x22, 0x21, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0x80, 0x82, 0x81, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x40, 0x40, 0x40, 0x40, 0x44, 0x44, 0x42, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x80, 0x80, 0x80, 0x80, 0x84, 0x84, 0x82, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x88, 0x88, 0x88, 0x88, 0x84, 0x84, 0x82, 0x81,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01,
-	},
-};
-
-/** flip array */
-const unsigned char FLIPPED[8][144] = {
-	{
-		0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x08, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x18, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x38, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x78, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x06, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x10, 0x16, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x30, 0x36, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x70, 0x76, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x0e, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x20, 0x2e, 0x2c, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x60, 0x6e, 0x6c, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x1e, 0x1c, 0x00, 0x18, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x40, 0x5e, 0x5c, 0x00, 0x58, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x3e, 0x3c, 0x00, 0x38, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-	{
-		0x00, 0x7e, 0x7c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	},
-};
-
-unsigned long long flip(int pos, unsigned long long P, unsigned long long O)
-{
-	int	index;
-	unsigned long long	flipped, mask;
-	int	x = pos & 7;
-	int	y = pos & 0x38;
-
-	P &= mask_x[pos][3];	// mask out unrelated bits to make dummy 0 bits for outside
-
-	index = OUTFLANK[x][_bextr_u32((O >> y), 1, 6)] & (P >> y);
-	flipped = ((unsigned long long) FLIPPED[x][index]) << y;
-
-	y >>= 3;
-	mask = mask_x[pos][0];
-	index = OUTFLANK[y][_bextr_u32(_pext_u64(O, mask), 1, 6)] & _pext_u64(P, mask);
-	flipped |= _pdep_u64(FLIPPED[y][index], mask);
-
-	mask = mask_x[pos][1];
-	index = OUTFLANK[y][_bextr_u32(_pext_u64(O, mask), 1, 6)] & _pext_u64(P, mask);
-	flipped |= _pdep_u64(FLIPPED[y][index], mask);
-
-	mask = mask_x[pos][2];
-	index = OUTFLANK[y][_bextr_u32(_pext_u64(O, mask), 1, 6)] & _pext_u64(P, mask);
-	flipped |= _pdep_u64(FLIPPED[y][index], mask);
-
-	return flipped;
-}
->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/flip_carry_32.c b/src/flip_carry_32.c
index 97d8957..badf039 100644
--- a/src/flip_carry_32.c
+++ b/src/flip_carry_32.c
@@ -248,15 +248,7 @@ static const unsigned long long  FLIPPED_5_V[137] = {
 /*
  * Set all bits below the sole outflank bit if outfrank != 0
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if (_MSC_VER >= 1800) && (defined(_M_IX86) || defined (_M_X64))
-=======
-#if _MSC_VER >= 1800
->>>>>>> 6506166 (More SSE optimizations)
-=======
-#if (_MSC_VER >= 1800) && (defined(_M_IX86) || defined (_M_X64))
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 #include <intrin.h>
 static inline unsigned long long OutflankToFlipmask(unsigned long long outflank) {
 	unsigned int flipmaskL, flipmaskH;
@@ -283,42 +275,11 @@ static unsigned long long flip_A1(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h;
 	unsigned long long flipped, outflank_v, outflank_d9;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_v = ((O | ~0x0101010101010100) + 1) & P & 0x0101010101010100;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010100;
-=======
-	outflank_v = ((O | ~0x0101010101010100) + 1) & P & 0x0101010101010000;
-<<<<<<< HEAD
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0001010101010100;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-	outflank_d9 = ((O | ~0x8040201008040200) + 1) & P & 0x8040201008040200;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040200;
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010100;
-
-	outflank_h = (O + 0x02) & P;
-	flipped |= ((outflank_h * 0xff) >> 8) & 0x7e;
->>>>>>> 6506166 (More SSE optimizations)
-
-<<<<<<< HEAD
-	outflank_h = (unsigned char) (O + 0x02) & P;
-	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 1);
-=======
-	outflank_d9 = ((O | ~0x8040201008040200) + 1) & P & 0x8040201008040000;
-<<<<<<< HEAD
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0040201008040200;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-=======
 	outflank_v = ((O | ~0x0101010101010100) + 1) & P & 0x0101010101010100;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010100;
 
 	outflank_d9 = ((O | ~0x8040201008040200) + 1) & P & 0x8040201008040200;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040200;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (unsigned char) (O + 0x02) & P;
 	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 1);
@@ -338,42 +299,11 @@ static unsigned long long flip_B1(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h;
 	unsigned long long flipped, outflank_v, outflank_d9;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_v = ((O | ~0x0202020202020200) + 1) & P & 0x0202020202020200;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020200;
-=======
-	outflank_v = ((O | ~0x0202020202020200) + 1) & P & 0x0202020202020000;
-<<<<<<< HEAD
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0002020202020200;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
 	outflank_d9 = ((O | ~0x0080402010080400) + 1) & P & 0x0080402010080400;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0080402010080400;
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020200;
-
-	outflank_h = (O + 0x04) & P;
-	flipped |= ((outflank_h * 0xff) >> 8) & 0x7c;
->>>>>>> 6506166 (More SSE optimizations)
-
-<<<<<<< HEAD
-	outflank_h = (unsigned char) (O + 0x04) & P;
-	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 2);
-=======
-	outflank_d9 = ((O | ~0x0080402010080400) + 1) & P & 0x0080402010080000;
-<<<<<<< HEAD
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000402010080400;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-=======
-	outflank_v = ((O | ~0x0202020202020200) + 1) & P & 0x0202020202020200;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020200;
-
-	outflank_d9 = ((O | ~0x0080402010080400) + 1) & P & 0x0080402010080400;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0080402010080400;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (unsigned char) (O + 0x04) & P;
 	flipped |= outflank_h - ((unsigned int) (outflank_h != 0) << 2);
@@ -394,15 +324,7 @@ static unsigned long long flip_C1(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((O | ~0x0404040404040400) + 1) & P & 0x0404040404040400;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040400;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0404040404040400;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040400;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_2[(O >> 1) & 0x3f] & P;
 	flipped |= (unsigned char) FLIPPED_2_H[outflank_h];
@@ -410,15 +332,7 @@ static unsigned long long flip_C1(const unsigned long long P, const unsigned lon
 	flipped |= (((unsigned int) P >> 7) & 0x00000200u & (unsigned int) O);
 
 	outflank_d9 = ((O | ~0x0000804020100800) + 1) & P & 0x0000804020100800;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000804020100800;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000804020100800;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000804020100800;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -436,15 +350,7 @@ static unsigned long long flip_D1(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v;
 
 	outflank_v = ((O | ~0x0808080808080800) + 1) & P & 0x0808080808080800;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0808080808080800;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_3[(O >> 1) & 0x3f] & P;
 	flipped |= (unsigned char) FLIPPED_3_H[outflank_h];
@@ -469,15 +375,7 @@ static unsigned long long flip_E1(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v;
 
 	outflank_v = ((O | ~0x1010101010101000) + 1) & P & 0x1010101010101000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x1010101010101000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_4[(O >> 1) & 0x3f] & P;
 	flipped |= (unsigned char) FLIPPED_4_H[outflank_h];
@@ -502,29 +400,13 @@ static unsigned long long flip_F1(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x2020202020202000) + 1) & P & 0x2020202020202000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020202000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x2020202020202000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020202000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_5[(O >> 1) & 0x3f] & P;
 	flipped |= (unsigned char) FLIPPED_5_H[outflank_h];
 
 	outflank_d7 = ((O | ~0x0000010204081000) + 1) & P & 0x0000010204080000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000204081000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000000204081000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000204081000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= (((unsigned int) P >> 9) & 0x00004000u & (unsigned int) O);
 
@@ -544,29 +426,13 @@ static unsigned long long flip_G1(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x4040404040404000) + 1) & P & 0x4040404040400000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040404000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0040404040404000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040404000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[O & 0x3e] & (P << 1);
 	flipped |= ((-outflank_h) & 0x3e) << 0;
 
 	outflank_d7 = ((O | ~0x0001020408102000) + 1) & P & 0x0001020408100000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000020408102000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -584,29 +450,13 @@ static unsigned long long flip_H1(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x8080808080808000) + 1) & P & 0x8080808080800000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080808000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0080808080808000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080808000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(O >> 1) & 0x3f] & (unsigned int) P;
 	flipped |= ((-outflank_h) & 0x3f) << 1;
 
 	outflank_d7 = ((O | ~0x0102040810204000) + 1) & P & 0x0102040810200000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0002040810204000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -623,42 +473,11 @@ static unsigned long long flip_A2(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h;
 	unsigned long long flipped, outflank_v, outflank_d9;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_v = ((O | ~0x0101010101010000) + 1) & P & 0x0101010101010000;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010000;
-=======
-	outflank_v = ((O | ~0x0101010101010000) + 1) & P & 0x0101010101000000;
-<<<<<<< HEAD
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0001010101010000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-	outflank_d9 = ((O | ~0x4020100804020000) + 1) & P & 0x4020100804020000;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x4020100804020000;
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010000;
-
-	outflank_h = (O + 0x0200) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x00007e00;
->>>>>>> 6506166 (More SSE optimizations)
-
-<<<<<<< HEAD
-	outflank_h = (unsigned short) (O + 0x0200) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e00;
-=======
-	outflank_d9 = ((O | ~0x4020100804020000) + 1) & P & 0x4020100804000000;
-<<<<<<< HEAD
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0020100804020000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-=======
 	outflank_v = ((O | ~0x0101010101010000) + 1) & P & 0x0101010101010000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010000;
 
 	outflank_d9 = ((O | ~0x4020100804020000) + 1) & P & 0x4020100804020000;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x4020100804020000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (unsigned short) (O + 0x0200) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e00;
@@ -678,42 +497,11 @@ static unsigned long long flip_B2(const unsigned long long P, const unsigned lon
 	unsigned int outflank_h;
 	unsigned long long flipped, outflank_v, outflank_d9;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_v = ((O | ~0x0202020202020000) + 1) & P & 0x0202020202020000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020000;
-=======
-	outflank_v = ((O | ~0x0202020202020000) + 1) & P & 0x0202020202000000;
-<<<<<<< HEAD
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0002020202020000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
 	outflank_d9 = ((O | ~0x8040201008040000) + 1) & P & 0x8040201008040000;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040000;
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020000;
-
-	outflank_h = (O + 0x0400) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x00007c00;
->>>>>>> 6506166 (More SSE optimizations)
-
-<<<<<<< HEAD
-	outflank_h = (unsigned short) (O + 0x0400) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c00;
-=======
-	outflank_d9 = ((O | ~0x8040201008040000) + 1) & P & 0x8040201008000000;
-<<<<<<< HEAD
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0040201008040000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-=======
-	outflank_v = ((O | ~0x0202020202020000) + 1) & P & 0x0202020202020000;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020000;
-
-	outflank_d9 = ((O | ~0x8040201008040000) + 1) & P & 0x8040201008040000;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (unsigned short) (O + 0x0400) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c00;
@@ -734,15 +522,7 @@ static unsigned long long flip_C2(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((O | ~0x0404040404040000) + 1) & P & 0x0404040404000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0004040404040000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_2[(O >> 9) & 0x3f] & (P >> 8);
 	flipped |= (unsigned int) FLIPPED_2_H[outflank_h] & 0x0000ff00u;
@@ -750,15 +530,7 @@ static unsigned long long flip_C2(const unsigned long long P, const unsigned lon
 	flipped |= (((unsigned int) P >> 7) & 0x00020000u & (unsigned int) O);
 
 	outflank_d9 = ((O | ~0x0080402010080000) + 1) & P & 0x0080402010080000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000402010080000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000402010080000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000402010080000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -776,38 +548,16 @@ static unsigned long long flip_D2(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7, outflank_d9;
 
 	outflank_v = ((O | ~0x0808080808080000) + 1) & P & 0x0808080808000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0008080808080000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_3[(O >> 9) & 0x3f] & (P >> 8);
 	flipped |= (unsigned int) FLIPPED_3_H[outflank_h] & 0x0000ff00u;
 
 	outflank_d7 = ((O | ~0x0000000102040000) + 1) & P & 0x0000000102000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000002040000;
-
-	outflank_d9 = ((O | ~0x0000804020100000) + 1) & P & 0x0000804020000000;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000004020100000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000000002040000;
-
-	outflank_d9 = ((O | ~0x0000804020100000) + 1) & P & 0x0000804020000000;
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000004020100000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000002040000;
 
 	outflank_d9 = ((O | ~0x0000804020100000) + 1) & P & 0x0000804020000000;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000004020100000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -825,38 +575,16 @@ static unsigned long long flip_E2(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7, outflank_d9;
 
 	outflank_v = ((O | ~0x1010101010100000) + 1) & P & 0x1010101010000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0010101010100000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_4[(O >> 9) & 0x3f] & (P >> 8);
 	flipped |= (unsigned int) FLIPPED_4_H[outflank_h] & 0x0000ff00u;
 
 	outflank_d7 = ((O | ~0x0000010204080000) + 1) & P & 0x0000010204000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000204080000;
 
 	outflank_d9 = ((O | ~0x0000008040200000) + 1) & P & 0x0000008040000000;
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000000040200000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000000204080000;
-
-	outflank_d9 = ((O | ~0x0000008040200000) + 1) & P & 0x0000008040000000;
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000000040200000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000204080000;
-
-	outflank_d9 = ((O | ~0x0000008040200000) + 1) & P & 0x0000008040000000;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000000040200000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -874,29 +602,13 @@ static unsigned long long flip_F2(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x2020202020200000) + 1) & P & 0x2020202020000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020200000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0020202020200000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020200000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_5[(O >> 9) & 0x3f] & ((unsigned int) P >> 8);
 	flipped |= (unsigned int) FLIPPED_5_H[outflank_h] & 0x0000ff00U;
 
 	outflank_d7 = ((O | ~0x0001020408100000) + 1) & P & 0x0001020408000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000020408100000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000020408100000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000020408100000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= (((unsigned int) P >> 9) & 0x00400000u & (unsigned int) O);
 
@@ -916,29 +628,13 @@ static unsigned long long flip_G2(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x4040404040400000) + 1) & P & 0x4040404040000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040400000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0040404040400000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040400000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(O >> 8) & 0x3e] & ((unsigned int) P >> 7);
 	flipped |= ((-outflank_h) & 0x3e) << 8;
 
 	outflank_d7 = ((O | ~0x0102040810200000) + 1) & P & 0x0102040810000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810200000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0002040810200000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810200000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -956,29 +652,13 @@ static unsigned long long flip_H2(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x8080808080800000) + 1) & P & 0x8080808080000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080800000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0080808080800000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080800000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(O >> 9) & 0x3f] & ((unsigned int) P >> 8);
 	flipped |= ((-outflank_h) & 0x3f) << 9;
 
 	outflank_d7 = ((O | ~0x0204081020400000) + 1) & P & 0x0204081020000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0004081020400000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -996,29 +676,13 @@ static unsigned long long flip_A3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((O | ~0x0101010101000000) + 1) & P & 0x0101010101000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0101010101000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((O & 0x007e0000u) + 0x00020000u) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007e0000u;
 
 	outflank_d9 = ((O | ~0x2010080402000000) + 1) & P & 0x2010080400000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0010080402000000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0010080402000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0010080402000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= LODWORD(O) & (((LODWORD(P) << 8) & 0x00000100u) | ((LODWORD(P) << 7) & 0x00000200u));
 
@@ -1038,29 +702,13 @@ static unsigned long long flip_B3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((O | ~0x0202020202000000) + 1) & P & 0x0202020202000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0202020202000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((O & 0x007c0000u) + 0x00040000u) & P;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007c0000u;
 
 	outflank_d9 = ((O | ~0x4020100804000000) + 1) & P & 0x4020100800000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0020100804000000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0020100804000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0020100804000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= LODWORD(O) & (((LODWORD(P) << 8) & 0x00000200u) | ((LODWORD(P) << 7) & 0x00000400u));
 	return flipped;
@@ -1079,30 +727,14 @@ static unsigned long long flip_C3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((O | ~0x0404040404000000) + 1) & P & 0x0404040404000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0404040404000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_2[(O >> 17) & 0x3f] & ((unsigned int) P >> 16);
 	flipped |= (unsigned int) FLIPPED_2_H[outflank_h] & 0x00ff0000u;
 
 
 	outflank_d9 = ((O | ~0x8040201008000000) + 1) & P & 0x8040201008000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008000000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x8040201008000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= LODWORD(O) & (((LODWORD(P) << 8) & 0x00000400u)
 			| ((LODWORD(P) << 9) & 0x00000200u)
@@ -1124,15 +756,7 @@ static unsigned long long flip_D3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v;
 
 	outflank_v = ((O | ~0x0808080808000000) + 1) & P & 0x0808080808000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0808080808000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_3[(O >> 17) & 0x3f] & ((unsigned int) P >> 16);
 	flipped |= (unsigned int) FLIPPED_3_H[outflank_h] & 0x00ff0000u;
@@ -1161,15 +785,7 @@ static unsigned long long flip_E3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v;
 
 	outflank_v = ((O | ~0x1010101010000000) + 1) & P & 0x1010101010000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x1010101010000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_4[(O >> 17) & 0x3f] & ((unsigned int) P >> 16);
 	flipped |= (unsigned int) FLIPPED_4_H[outflank_h] & 0x00ff0000u;
@@ -1198,29 +814,13 @@ static unsigned long long flip_F3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x2020202020000000) + 1) & P & 0x2020202020000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x2020202020000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_5[(O >> 17) & 0x3f] & ((unsigned int) P >> 16);
 	flipped |= (unsigned int) FLIPPED_5_H[outflank_h] & 0x00ff0000u;
 
 	outflank_d7 = ((O | ~0x0102040810000000) + 1) & P & 0x0102040810000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810000000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0102040810000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= LODWORD(O) & (((LODWORD(P) << 8) & 0x00002000u)
 			| ((LODWORD(P) << 7) & 0x00004000u)
@@ -1242,29 +842,13 @@ static unsigned long long flip_G3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x4040404040000000) + 1) & P & 0x4040404040000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x4040404040000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(O >> 16) & 0x3e] & ((unsigned int) P >> 15);
 	flipped |= ((-outflank_h) & 0x3e) << 16;
 
 	outflank_d7 = ((O | ~0x0204081020000000) + 1) & P & 0x0204081000000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0004081020000000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0004081020000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0004081020000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= LODWORD(O) & (((LODWORD(P) << 8) & 0x00004000u) | ((LODWORD(P) << 9) & 0x00002000u));
 
@@ -1285,29 +869,13 @@ static unsigned long long flip_H3(const unsigned long long P, const unsigned lon
 	unsigned long long flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((O | ~0x8080808080000000) + 1) & P & 0x8080808080000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x8080808080000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(O >> 17) & 0x3f] & ((unsigned int) P >> 16);
 	flipped |= ((-outflank_h) & 0x3f) << 17;
 
 	outflank_d7 = ((O | ~0x0408102040000000) + 1) & P & 0x0408102000000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0008102040000000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0008102040000000;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0008102040000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= LODWORD(O) & (((LODWORD(P) << 8) & 0x00008000u) | ((LODWORD(P) << 9) & 0x00004000u));
 
diff --git a/src/flip_carry_64.c b/src/flip_carry_64.c
index 66ea0d4..5cbf39f 100644
--- a/src/flip_carry_64.c
+++ b/src/flip_carry_64.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1525ec4 (Use same OutflankToFlip as flip_bitscan, and fix typo bug)
 /**
  * @file flip_carry_64.c
  *
@@ -2096,2094 +2092,3 @@ unsigned long long (*flip[])(const unsigned long long, const unsigned long long)
 	flip_pass, flip_pass
 };
 
-<<<<<<< HEAD
-=======
-/**
- * @file flip_carry_64.c
- *
- * This module deals with flipping discs.
- *
- * A function is provided for each square of the board. These functions are
- * gathered into an array of functions, so that a fast access to each function
- * is allowed. The generic form of the function take as input the player and
- * the opponent bitboards and return the flipped squares into a bitboard.
- *
- * Given the following notation:
- *  - x = square where we play,
- *  - P = player's disc pattern,
- *  - O = opponent's disc pattern,
- * the basic principle is to read into an array the result of a move. Doing
- * this is easier for a single line ; so we can use arrays of the form:
- *  - ARRAY[x][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns,a first array (OUTFLANK) is used to
- * get the player's discs that surround the opponent discs:
- *  - outflank = OUTFLANK[x][O] & P
- * (Only inner 6-bits of the P are in interest here.)
- * The result is then used as an index to access a second array giving the
- * flipped discs according to the surrounding player's discs:
- *  - flipped = FLIPPED[x][outflank].
- * (Flipped discs fall into inner 6-bits.)
- * Finally, a precomputed array transform the inner 6-bits disc pattern back into a
- * 64-bits disc pattern, and the flipped squares for each line are gathered and
- * returned to generate moves.
- *
- * If the OUTFLANK search is in LSB to MSB direction, carry propagation 
- * can be used to determine contiguous opponent discs.
- * If the OUTFLANK search is in MSB to LSB direction, CONTIG_X tables
- * are used to determine coutiguous opponent discs.
- *
- * @date 1998 - 2018
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-/** outflank array (indexed with inner 6 bits) */
-/* static const unsigned char OUTFLANK_0[64] = {
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x20,
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x40,
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x20,
-	0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x04, 0x00, 0x80
-}; */
-
-/* static const unsigned char OUTFLANK_1[64] = {
-	0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00,
-	0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00,
-	0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00,
-	0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x80, 0x00
-}; */
-
-static const unsigned char OUTFLANK_2[64] = {
-	0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x21, 0x00, 0x00,
-	0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x40, 0x41, 0x00, 0x00,
-	0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x21, 0x00, 0x00,
-	0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x80, 0x81, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_3[64] = {
-	0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x22, 0x21, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0x42, 0x41, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x22, 0x21, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0x80, 0x82, 0x81, 0x00, 0x00, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_4[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x40, 0x40, 0x40, 0x40, 0x44, 0x44, 0x42, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x80, 0x80, 0x80, 0x80, 0x84, 0x84, 0x82, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_5[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x88, 0x88, 0x88, 0x88, 0x84, 0x84, 0x82, 0x81,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-/* static const unsigned char OUTFLANK_6[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-}; */
-
-static const unsigned char OUTFLANK_7[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01
-};
-
-/* (Count of leading 1 from bit 5) * 8 */
-static const unsigned char CONTIG_UP[64] = {
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
-	16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 32, 32, 40, 48
-};
-
-/* (Count of leading 1 from bit 5) * 9 */
-static const unsigned char CONTIG_UPLEFT[64] = {
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-	18, 18, 18, 18, 18, 18, 18, 18, 27, 27, 27, 27, 36, 36, 45, 54
-};
-
-/* (Count of trailing 1) * 7 */
-static const unsigned char CONTIG_UPRIGHT[64] = {
-	 0,  7,  0, 14,  0,  7,  0, 21,  0,  7,  0, 14,  0,  7,  0, 28,
-	 0,  7,  0, 14,  0,  7,  0, 21,  0,  7,  0, 14,  0,  7,  0, 35,
-	 0,  7,  0, 14,  0,  7,  0, 21,  0,  7,  0, 14,  0,  7,  0, 28,
-	 0,  7,  0, 14,  0,  7,  0, 21,  0,  7,  0, 14,  0,  7,  0, 42
-};
-
-/** flip array (indexed with outflank, returns inner 6 bits) */
-static const unsigned long long FLIPPED_2_H[130] = {
-	0x0000000000000000, 0x0202020202020202, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0808080808080808, 0x0a0a0a0a0a0a0a0a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x1818181818181818, 0x1a1a1a1a1a1a1a1a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x3838383838383838, 0x3a3a3a3a3a3a3a3a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x7878787878787878, 0x7a7a7a7a7a7a7a7a
-};
-
-static const unsigned long long FLIPPED_3_H[131] = {
-	0x0000000000000000, 0x0606060606060606, 0x0404040404040404, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x1010101010101010, 0x1616161616161616, 0x1414141414141414, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x3030303030303030, 0x3636363636363636, 0x3434343434343434, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x7070707070707070, 0x7676767676767676, 0x7474747474747474
-};
-
-static const unsigned long long FLIPPED_4_H[133] = {
-	0x0000000000000000, 0x0e0e0e0e0e0e0e0e, 0x0c0c0c0c0c0c0c0c, 0x0000000000000000, 0x0808080808080808, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x2020202020202020, 0x2e2e2e2e2e2e2e2e, 0x2c2c2c2c2c2c2c2c, 0x0000000000000000, 0x2828282828282828, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x6060606060606060, 0x6e6e6e6e6e6e6e6e, 0x6c6c6c6c6c6c6c6c, 0x0000000000000000, 0x6868686868686868
-};
-
-static const unsigned long long FLIPPED_5_H[137] = {
-	0x0000000000000000, 0x1e1e1e1e1e1e1e1e, 0x1c1c1c1c1c1c1c1c, 0x0000000000000000, 0x1818181818181818, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x1010101010101010, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x4040404040404040, 0x5e5e5e5e5e5e5e5e, 0x5c5c5c5c5c5c5c5c, 0x0000000000000000, 0x5858585858585858, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x5050505050505050
-};
-
-static const unsigned long long FLIPPED_2_V[130] = {
-	0x0000000000000000, 0x000000000000ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ff000000, 0x00000000ff00ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ffff000000, 0x000000ffff00ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000ffffff000000, 0x0000ffffff00ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffffffff000000, 0x00ffffffff00ff00
-};
-
-static const unsigned long long FLIPPED_3_V[131] = {
-	0x0000000000000000, 0x0000000000ffff00, 0x0000000000ff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ff00000000, 0x000000ff00ffff00, 0x000000ff00ff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000ffff00000000, 0x0000ffff00ffff00, 0x0000ffff00ff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffffff00000000, 0x00ffffff00ffff00, 0x00ffffff00ff0000
-};
-
-static const unsigned long long FLIPPED_4_V[133] = {
-	0x0000000000000000, 0x00000000ffffff00, 0x00000000ffff0000, 0x0000000000000000, 0x00000000ff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000ff0000000000, 0x0000ff00ffffff00, 0x0000ff00ffff0000, 0x0000000000000000, 0x0000ff00ff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffff0000000000, 0x00ffff00ffffff00, 0x00ffff00ffff0000, 0x0000000000000000, 0x00ffff00ff000000
-};
-
-static const unsigned long long FLIPPED_5_V[137] = {
-	0x0000000000000000, 0x000000ffffffff00, 0x000000ffffff0000, 0x0000000000000000, 0x000000ffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ff000000000000, 0x00ff00ffffffff00, 0x00ff00ffffff0000, 0x0000000000000000, 0x00ff00ffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ff00ff00000000
-};
-
-static const unsigned long long FLIPPED_2_U[130] = {
-	0x0000000000000000, 0x00ff000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ff00000000, 0x00ff00ff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ffff000000, 0x00ff00ffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ffffff0000, 0x00ff00ffffff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000ffffffff00, 0x00ff00ffffffff00
-};
-
-static const unsigned long long FLIPPED_3_U[131] = {
-	0x0000000000000000, 0x00ffff0000000000, 0x0000ff0000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ff000000, 0x00ffff00ff000000, 0x0000ff00ff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ffff0000, 0x00ffff00ffff0000, 0x0000ff00ffff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ffffff00, 0x00ffff00ffffff00, 0x0000ff00ffffff00
-};
-
-static const unsigned long long FLIPPED_4_U[133] = {
-	0x0000000000000000, 0x00ffffff00000000, 0x0000ffff00000000, 0x0000000000000000, 0x000000ff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000ff0000, 0x00ffffff00ff0000, 0x0000ffff00ff0000, 0x0000000000000000, 0x000000ff00ff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000ffff00, 0x00ffffff00ffff00, 0x0000ffff00ffff00, 0x0000000000000000, 0x000000ff00ffff00
-};
-
-static const unsigned long long  FLIPPED_5_U[137] = {
-	0x0000000000000000, 0x00ffffffff000000, 0x0000ffffff000000, 0x0000000000000000, 0x000000ffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000ff00, 0x00ffffffff00ff00, 0x0000ffffff00ff00, 0x0000000000000000, 0x000000ffff00ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ff00ff00
-};
-
-
-/*
- * Set all bits below the sole outflank bit if outfrank != 0
- */
-#if defined(_M_X64) && (_MSC_VER >= 1800)
-static inline unsigned long long OutflankToFlipmask(unsigned long long outflank) {
-	unsigned long long flipmask;
-	unsigned char cy = _subborrow_u64(0, outflank, 1, &flipmask);
-	_addcarry_u64(cy, flipmask, 0, &flipmask);
-	return flipmask;
-}
-#else
-	#define OutFlankToFlipmask(x)	((x) - (unsigned int) ((x) != 0))
-#endif
-
-/*
- * Scan non-opponent bit (player or empty) from LSB
- */
-#if 0 // LS1B
-static unsigned long long OUTFLANK_LEFT(unsigned long long O, unsigned long long M) {
-	unsigned long long b = ~O & M;
-	return b & -b;
-}
-#else // carry propagation - less register pressure
-/*
- *	((~O & M) & -(~O & M)) & P		// LS1B
- *	((~O & M) & (~(~O & M) + 1)) & P	// -x = (~x + 1)
- *	(~O & M & ((O | ~M) + 1)) & P		// De Morgan
- *	(((O | ~M) + 1) & M) & (P & ~O)		// P & O = 0, so P & ~O = P
- *	(((O | ~M) + 1) & M) & P		// carry propagation
- */
-#define OUTFLANK_LEFT(O,M)	((((O) | ~(M)) + 1) & (M))
-#endif
-
-/**
- * Compute flipped discs when playing on square A1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned char outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0101010101010100) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010100;
-
-	outflank_d9 = OUTFLANK_LEFT(O, 0x8040201008040200) & P;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040200;
-
-	outflank_h = (O + 0x02) & P;
-	flipped += ((outflank_h * 0x7f) >> 8) << 1;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned char outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0202020202020200) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020200;
-
-	outflank_d9 = OUTFLANK_LEFT(O, 0x0080402010080400) & P;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0080402010080400;
-
-	outflank_h = (O + 0x04) & P;
-	flipped += ((outflank_h * 0x3f) >> 8) << 2;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0404040404040400) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040400;
-
-	outflank_d = OUTFLANK_2[((O & 0x0000004020100a04) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000804020110a04) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_2_H[outflank_d] & 0x0000004020100a04;	// A3C1H6
-
-	outflank_h = OUTFLANK_2[(O >> 1) & 0x3f] & P;
-	flipped |= (unsigned char) FLIPPED_2_H[outflank_h];
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0808080808080800) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
-
-	outflank_d = OUTFLANK_3[((O & 0x0000000040221408) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000008041221408) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0000000040221408;	// A4D1H5
-
-	outflank_h = OUTFLANK_3[(O >> 1) & 0x3f] & P;
-	flipped |= (unsigned char) FLIPPED_3_H[outflank_h];
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x1010101010101000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
-
-	outflank_d = OUTFLANK_4[((O & 0x0000000002442810) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000000182442810) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000002442810;	// A5E1H4
-
-	outflank_h = OUTFLANK_4[(O >> 1) & 0x3f] & P;
-	flipped |= (unsigned char) FLIPPED_4_H[outflank_h];
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x2020202020202000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020202000;
-
-	outflank_d = OUTFLANK_5[((O & 0x0000000204085020) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000010204885020) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_5_H[outflank_d] & 0x0000000204085020;	// A6F1H3
-
-	outflank_h = OUTFLANK_5[(O >> 1) & 0x3f] & P;
-	flipped |= (unsigned char) FLIPPED_5_H[outflank_h];
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d7;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x4040404040404000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040404000;
-
-	outflank_d7 = OUTFLANK_LEFT(O, 0x0001020408102000) & P;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
-
-	outflank_h = OUTFLANK_7[O & 0x3e] & (P << 1);
-	flipped |= ((-outflank_h) & 0x3e) << 0;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H1(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d7;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x8080808080808000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080808000;
-
-	outflank_d7 = OUTFLANK_LEFT(O, 0x0102040810204000) & P;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
-
-	outflank_h = OUTFLANK_7[(O >> 1) & 0x3f] & P;
-	flipped |= ((-outflank_h) & 0x3f) << 1;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned short outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0101010101010000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010000;
-
-	outflank_d9 = OUTFLANK_LEFT(O, 0x4020100804020000) & P;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x4020100804020000;
-
-	outflank_h = (O + 0x0200) & P;
-	flipped |= ((outflank_h * 0x7f) >> 16) << 9;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned short outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0202020202020000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020000;
-
-	outflank_d9 = OUTFLANK_LEFT(O, 0x8040201008040000) & P;
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040000;
-
-	outflank_h = (O + 0x0400) & P;
-	flipped |= ((outflank_h * 0x3f) >> 16) << 10;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0404040404040000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040000;
-
-	outflank_d = OUTFLANK_2[((O & 0x00004020100a0400) * 0x0101010101010101) >> 57]
-		& (((P & 0x00804020110a0400) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_2_H[outflank_d] & 0x00004020100a0400;	// A4C2H7
-
-	outflank_h = OUTFLANK_2[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x000000000000ff00;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x0808080808080000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
-
-	outflank_d = OUTFLANK_3[((O & 0x0000004022140800) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000804122140800) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0000004022140800;	// A5D2H6
-
-	outflank_h = OUTFLANK_3[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x000000000000ff00;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x1010101010100000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
-
-	outflank_d = OUTFLANK_4[((O & 0x0000000244281000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000018244281000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000244281000;	// A6E2H5
-
-	outflank_h = OUTFLANK_4[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x000000000000ff00;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x2020202020200000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020200000;
-
-	outflank_d = OUTFLANK_5[((O & 0x0000020408502000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0001020488502000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_5_H[outflank_d] & 0x0000020408502000;	// A7F2H4
-
-	outflank_h = OUTFLANK_5[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x000000000000ff00;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d7;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x4040404040400000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040400000;
-
-	outflank_d7 = OUTFLANK_LEFT(O, 0x0102040810200000) & P;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810200000;
-
-	outflank_h = OUTFLANK_7[(O >> 8) & 0x3e] & (P >> 7);
-	flipped |= ((-outflank_h) & 0x3e) << 8;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H2(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d7;
-
-	outflank_v = OUTFLANK_LEFT(O, 0x8080808080800000) & P;
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080800000;
-
-	outflank_d7 = OUTFLANK_LEFT(O, 0x0204081020400000) & P;
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
-
-	outflank_h = OUTFLANK_7[(O >> 9) & 0x3f] & (P >> 8);
-	flipped |= ((-outflank_h) & 0x3f) << 9;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_a1a3f8, outflank_a8a3c1;
-	unsigned long long flipped;
-
-	outflank_a1a3f8 = OUTFLANK_2[((O & 0x0010080402010100) * 0x0102040404040404) >> 57]
-		& (((P & 0x2010080402010101) * 0x0102040404040404) >> 56);
-	flipped = FLIPPED_2_V[outflank_a1a3f8] & 0x0010080402010100;
-
-	outflank_a8a3c1 = OUTFLANK_5[((O & 0x0001010101010200) * 0x2020201008040201) >> 57]
-		& (((P & 0x0101010101010204) * 0x2020201008040201) >> 56);
-	flipped |= FLIPPED_5_U[outflank_a8a3c1] & 0x0001010101010200;
-
-	outflank_h = ((O & 0x007e0000u) + 0x00020000u) & P;
-	flipped |= ((outflank_h * 0x7f) >> 24) << 17;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_b1b3g8, outflank_b8b3d1;
-	unsigned long long flipped;
-
-	outflank_b1b3g8 = OUTFLANK_2[((O & 0x0020100804020200) * 0x0081020202020202) >> 57]
-		& (((P & 0x4020100804020202) * 0x0081020202020202) >> 56);
-	flipped = FLIPPED_2_V[outflank_b1b3g8] & 0x0020100804020200;
-
-	outflank_b8b3d1 = OUTFLANK_5[((O & 0x0002020202020400) * 0x0010100804020100) >> 57]
-		& ((((P & 0x0202020202020408) >> 1) * 0x2020201008040201) >> 56);
-	flipped |= FLIPPED_5_U[outflank_b8b3d1] & 0x0002020202020400;
-
-	outflank_h = ((O & 0x007c0000u) + 0x00040000u) & P;
-	flipped |= ((outflank_h * 0x3f) >> 24) << 18;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d9;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_2[((O & 0x0004040404040400) * 0x0040810204081020) >> 57]
-		& (((P & 0x0404040404040404) * 0x0040810204081020) >> 56);
-	flipped = FLIPPED_2_V[outflank_v] & 0x0004040404040400;
-
-	outflank_h = OUTFLANK_2[(O >> 17) & 0x3f] & (P >> 16);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x0000000000ff0000;
-
-	flipped |= (((P >> 7) | (P << 7)) & 0x000000002000800) & O;
-
-	outflank_d9 = OUTFLANK_2[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_2_H[outflank_d9] & 0x0040201008040200;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_2[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-	flipped = FLIPPED_2_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 17) & 0x3f] & (P >> 16);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x0000000000ff0000;
-
-	outflank_d = OUTFLANK_3[((O & 0x0000402214080000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0080412214080000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0000402214080000;	// A6D3H7
-
-	flipped |= (((P << 7) & 0x0000000000001000) | ((P << 9) & 0x000000000000400)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_2[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-	flipped = FLIPPED_2_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 17) & 0x3f] & (P >> 16);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x0000000000ff0000;
-
-	outflank_d = OUTFLANK_4[((O & 0x0000024428100000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0001824428100000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0000024428100000;	// A7E3H6
-
-	flipped |= (((P << 7) & 0x0000000000002000) | ((P << 9) & 0x000000000000800)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_2[((O & 0x0020202020202000) * 0x0008102040810204) >> 57]
-		& (((P & 0x2020202020202020) * 0x0008102040810204) >> 56);
-	flipped = FLIPPED_2_V[outflank_v] & 0x0020202020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 17) & 0x3f] & (P >> 16);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x0000000000ff0000;
-
-	outflank_d7 = OUTFLANK_5[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0102040810204080) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_5_H[outflank_d7] & 0x0002040810204000;
-
-	flipped |= (((P >> 9) | (P << 9)) & 0x0000000040001000) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_e1g3g8, outflank_b8g3g1;
-	unsigned long long flipped;
-
-	outflank_e1g3g8 = OUTFLANK_2[((O & 0x0040404040402000) * 0x0010101020408102) >> 57]
-		& (((P & 0x4040404040402010) * 0x0010101020408102) >> 56);
-	flipped = FLIPPED_2_V[outflank_e1g3g8] & 0x0040404040402000;
-
-	outflank_b8g3g1 = OUTFLANK_5[((O & 0x0004081020404000) * 0x0402010101010101) >> 58]
-		& ((((P & 0x0204081020404040) >> 1) * 0x0402010101010101) >> 56);
-	flipped |= FLIPPED_5_U[outflank_b8g3g1] & 0x0004081020404000;
-
-	outflank_h = OUTFLANK_7[(O >> 16) & 0x3e] & (P >> 15);
-	flipped |= ((-outflank_h) & 0x3e) << 16;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H3(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_f1h3h8, outflank_c8h3h1;
-	unsigned long long flipped;
-
-	outflank_f1h3h8 = OUTFLANK_2[((O & 0x0080808080804000) * 0x0008080810204081) >> 57]
-		& (((P & 0x8080808080804020) * 0x0008080810204081) >> 56);
-	flipped = FLIPPED_2_V[outflank_f1h3h8] & 0x0080808080804000;
-
-	outflank_c8h3h1 = OUTFLANK_5[((O & 0x0008102040808000) * 0x0000804040404040) >> 57]
-		& ((((P & 0x0408102040808080) >> 2) * 0x0402010101010101) >> 56);
-	flipped |= FLIPPED_5_U[outflank_c8h3h1] & 0x0008102040808000;
-
-	outflank_h = OUTFLANK_7[(O >> 17) & 0x3f] & (P >> 16);
-	flipped |= ((-outflank_h) & 0x3f) << 17;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_a1a4e8, outflank_a8a4d1;
-	unsigned long long flipped;
-
-	outflank_a1a4e8 = OUTFLANK_3[((O & 0x0008040201010100) * 0x0102040808080808) >> 57]
-		& (((P & 0x1008040201010101) * 0x0102040808080808) >> 56);
-	flipped = FLIPPED_3_V[outflank_a1a4e8] & 0x0008040201010100;
-
-	outflank_a8a4d1 = OUTFLANK_4[((O & 0x0001010101020400) * 0x1010101008040201) >> 57]
-		& (((P & 0x0101010101020408) * 0x1010101008040201) >> 56);
-	flipped |= FLIPPED_4_U[outflank_a8a4d1] & 0x0001010101020400;
-
-	outflank_h = ((unsigned int) O + 0x02000000u) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e000000u;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_b1b4f8, outflank_b8b4e1;
-	unsigned long long flipped;
-
-	outflank_b1b4f8 = OUTFLANK_3[((O & 0x0010080402020200) * 0x0081020404040404) >> 57]
-		& (((P & 0x2010080402020202) * 0x0081020404040404) >> 56);
-	flipped = FLIPPED_3_V[outflank_b1b4f8] & 0x0010080402020200;
-
-	outflank_b8b4e1 = OUTFLANK_4[((O & 0x0002020202040800) * 0x1010101008040201) >> 58]
-		& ((((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56);
-	flipped |= FLIPPED_4_U[outflank_b8b4e1] & 0x0002020202040800;
-
-	outflank_h = ((unsigned int) O + 0x04000000u) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c000000u;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_c1c4g8, outflank_c8c4f1;
-	unsigned long long flipped;
-
-	outflank_c1c4g8 = OUTFLANK_3[((O & 0x0020100804040400) * 0x0040810202020202) >> 57]
-		& (((P & 0x4020100804040404) * 0x0040810202020202) >> 56);
-	flipped = FLIPPED_3_V[outflank_c1c4g8] & 0x0020100804040400;
-
-	outflank_c8c4f1 = OUTFLANK_4[((O & 0x0004040404081000) * 0x0404040402010080) >> 57]
-		& ((((P & 0x0404040404081020) >> 2) * 0x1010101008040201) >> 56);
-	flipped |= FLIPPED_4_U[outflank_c8c4f1] & 0x0004040404081000;
-
-	outflank_h = OUTFLANK_2[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x00000000ff000000;
-
-	flipped |= (((P << 9) & 0x00000000000020000) | ((P >> 7) & 0x00000000200000000)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_3[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-	flipped = FLIPPED_3_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x00000000ff000000;
-
-	outflank_d7 = OUTFLANK_3[((O & 0x0000020408102000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0001020408102040) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d7] & 0x0000020408102000;
-
-	outflank_d9 = OUTFLANK_3[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d9] & 0x0040201008040200;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_3[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-	flipped = FLIPPED_3_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x00000000ff000000;
-
-	outflank_d7 = OUTFLANK_4[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0102040810204080) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d7] & 0x0002040810204000;
-
-	outflank_d9 = OUTFLANK_4[((O & 0x0000402010080400) * 0x0101010101010101) >> 57]
-		& (((P & 0x0080402010080402) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d9] & 0x0000402010080400;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_c1f4f8, outflank_b8f4f1;
-	unsigned long long flipped;
-
-	outflank_c1f4f8 = OUTFLANK_3[((O & 0x0020202020100800) * 0x0040404040810204) >> 57]
-		& (((P & 0x2020202020100804) * 0x0040404040810204) >> 56);
-	flipped = FLIPPED_3_V[outflank_c1f4f8] & 0x0020202020100800;
-
-	outflank_b8f4f1 = OUTFLANK_4[((O & 0x0004081020202000) * 0x0804020101010101) >> 58]
-		& ((((P & 0x0204081020202020) >> 1) * 0x0804020101010101) >> 56);
-	flipped |= FLIPPED_4_U[outflank_b8f4f1] & 0x0004081020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x00000000ff000000;
-
-	flipped |= (((P << 7) & 0x0000000000400000) | ((P >> 9) & 0x0000004000000000)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d1g4g8, outflank_c8g4g1;
-	unsigned long long flipped;
-
-	outflank_d1g4g8 = OUTFLANK_3[((O & 0x0040404040201000) * 0x0020202020408102) >> 57]
-		& (((P & 0x4040404040201008) * 0x0020202020408102) >> 56);
-	flipped = FLIPPED_3_V[outflank_d1g4g8] & 0x0040404040201000;
-
-	outflank_c8g4g1 = OUTFLANK_4[((O & 0x0008102040404000) * 0x0001008040404040) >> 57]
-		& ((((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56);
-	flipped |= FLIPPED_4_U[outflank_c8g4g1] & 0x0008102040404000;
-
-	outflank_h = OUTFLANK_7[(O >> 24) & 0x3e] & (P >> 23);
-	flipped |= ((-outflank_h) & 0x3e) << 24;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H4(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_e1h4h8, outflank_d8h4h1;
-	unsigned long long flipped;
-
-	outflank_e1h4h8 = OUTFLANK_3[((O & 0x0080808080402000) * 0x0010101010204081) >> 57]
-		& (((P & 0x8080808080402010) * 0x0010101010204081) >> 56);
-	flipped = FLIPPED_3_V[outflank_e1h4h8] & 0x0080808080402000;
-
-	outflank_d8h4h1 = OUTFLANK_4[((O & 0x0010204080808000) * 0x0000804020202020) >> 57]
-		& ((((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56);
-	flipped |= FLIPPED_4_U[outflank_d8h4h1] & 0x0010204080808000;
-
-	outflank_h = OUTFLANK_7[(O >> 25) & 0x3f] & (P >> 24);
-	flipped |= ((-outflank_h) & 0x3f) << 25;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_a1a5d8, outflank_a8a5e1, outflank_h;
-	unsigned long long flipped;
-
-	outflank_a1a5d8 = OUTFLANK_4[((O & 0x0004020101010100) * 0x0102040810101010) >> 57]
-		& (((P & 0x0804020101010101) * 0x0102040810101010) >> 56);
-	flipped = FLIPPED_4_V[outflank_a1a5d8] & 0x0004020101010100;
-
-	outflank_a8a5e1 = OUTFLANK_3[((O & 0x0001010102040800) * 0x0808080808040201) >> 57]
-		& (((P & 0x0101010102040810) * 0x0808080808040201) >> 56);
-	flipped |= FLIPPED_3_U[outflank_a8a5e1] & 0x0001010102040800;
-
-	outflank_h = ((unsigned int) (O >> 8) + 0x02000000) & (unsigned int) (P >> 8);
-	flipped |= (((unsigned long long) outflank_h << 8) - outflank_h) & 0x0000007e00000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_b1b5e8, outflank_b8b5f1, outflank_h;
-	unsigned long long flipped;
-
-	outflank_b1b5e8 = OUTFLANK_4[((O & 0x0008040202020200) * 0x0081020408080808) >> 57]
-		& (((P & 0x1008040202020202) * 0x0081020408080808) >> 56);
-	flipped = FLIPPED_4_V[outflank_b1b5e8] & 0x0008040202020200;
-
-	outflank_b8b5f1 = OUTFLANK_3[((O & 0x0002020204081000) * 0x0808080808040201) >> 58]
-		& ((((P & 0x0202020204081020) >> 1) * 0x0808080808040201) >> 56);
-	flipped |= FLIPPED_3_U[outflank_b8b5f1] & 0x0002020204081000;
-
-	outflank_h = ((unsigned int) (O >> 8) + 0x04000000) & (unsigned int) (P >> 8);
-	flipped |= (((unsigned long long) outflank_h << 8) - outflank_h) & 0x0000007c00000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_c1c5f8, outflank_c8c5g1;
-	unsigned long long flipped;
-
-	outflank_c1c5f8 = OUTFLANK_4[((O & 0x0010080404040400) * 0x0040810204040404) >> 57]
-		& (((P & 0x2010080404040404) * 0x0040810204040404) >> 56);
-	flipped = FLIPPED_4_V[outflank_c1c5f8] & 0x0010080404040400;
-
-	outflank_c8c5g1 = OUTFLANK_3[((O & 0x0004040408102000) * 0x0002020202010080) >> 57]
-		& ((((P & 0x0404040408102040) >> 2) * 0x0808080808040201) >> 56);
-	flipped |= FLIPPED_3_U[outflank_c8c5g1] & 0x0004040408102000;
-
-	outflank_h = OUTFLANK_2[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x000000ff00000000;
-
-	flipped |= (((P << 9) & 0x0000000002000000) | ((P >> 7) & 0x0000020000000000)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_4[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-	flipped = FLIPPED_4_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x000000ff00000000;
-
-	outflank_d7 = OUTFLANK_3[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0102040810204080) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d7] & 0x0002040810204000;
-
-	outflank_d9 = OUTFLANK_3[((O & 0x0020100804020000) * 0x0101010101010101) >> 57]
-		& (((P & 0x4020100804020100) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d9] & 0x0020100804020000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_4[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-	flipped = FLIPPED_4_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x000000ff00000000;
-
-	outflank_d7 = OUTFLANK_4[((O & 0x0004081020400000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0204081020408000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d7] & 0x0004081020400000;
-
-	outflank_d9 = OUTFLANK_4[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d9] & 0x0040201008040200;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_b1f5f8, outflank_c8f5f1;
-	unsigned long long flipped;
-
-	outflank_b1f5f8 = OUTFLANK_4[((O & 0x0020202010080400) * 0x0080808080810204) >> 57]
-		& (((P & 0x2020202010080402) * 0x0080808080810204) >> 56);
-	flipped = FLIPPED_4_V[outflank_b1f5f8] & 0x0020202010080400;
-
-	outflank_c8f5f1 = OUTFLANK_3[((O & 0x0008102020202000) * 0x0002010080404040) >> 57]
-		& ((((P & 0x0408102020202020) >> 2) * 0x1008040201010101) >> 56);
-	flipped |= FLIPPED_3_U[outflank_c8f5f1] & 0x0008102020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x000000ff00000000;
-
-	flipped |= (((P << 7) & 0x0000000040000000) | ((P >> 9) & 0x0000400000000000)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_c1g5g8, outflank_d8g5g1;
-	unsigned long long flipped;
-
-	outflank_c1g5g8 = OUTFLANK_4[((O & 0x0040404020100800) * 0x0040404040408102) >> 57]
-		& (((P & 0x4040404020100804) * 0x0040404040408102) >> 56);
-	flipped = FLIPPED_4_V[outflank_c1g5g8] & 0x0040404020100800;
-
-	outflank_d8g5g1 = OUTFLANK_3[((O & 0x0010204040404000) * 0x0001008040202020) >> 57]
-		& ((((P & 0x0810204040404040) >> 3) * 0x1008040201010101) >> 56);
-	flipped |= FLIPPED_3_U[outflank_d8g5g1] & 0x0010204040404000;
-
-	outflank_h = OUTFLANK_7[(O >> 32) & 0x3e] & (P >> 31);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3e) << 32;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H5(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d1h5h8, outflank_e8h5h1;
-	unsigned long long flipped;
-
-	outflank_d1h5h8 = OUTFLANK_4[((O & 0x0080808040201000) * 0x0020202020204081) >> 57]
-		& (((P & 0x8080808040201008) * 0x0020202020204081) >> 56);
-	flipped = FLIPPED_4_V[outflank_d1h5h8] & 0x0080808040201000;
-
-	outflank_e8h5h1 = OUTFLANK_3[((O & 0x0020408080808000) * 0x0000804020101010) >> 57]
-		& ((((P & 0x1020408080808080) >> 4) * 0x1008040201010101) >> 56);
-	flipped |= FLIPPED_3_U[outflank_e8h5h1] & 0x0020408080808000;
-
-	outflank_h = OUTFLANK_7[(O >> 33) & 0x3f] & (P >> 32);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3f) << 33;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_a1a6c8, outflank_a8a6f1, outflank_h;
-	unsigned long long flipped;
-
-	outflank_a1a6c8 = OUTFLANK_5[((O & 0x0002010101010100) * 0x0102040810202020) >> 57]
-		& (((P & 0x0402010101010101) * 0x0102040810202020) >> 56);
-	flipped = FLIPPED_5_V[outflank_a1a6c8] & 0x0002010101010100;
-
-	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0001010204081000) * 0x0404040404040201) >> 57]
-		& (((P & 0x0101010204081020) * 0x0404040404040201) >> 56);
-	flipped |= FLIPPED_2_U[outflank_a8a6f1] & 0x0001010204081000;
-
-	outflank_h = ((unsigned int) (O >> 16) + 0x02000000) & (unsigned int) (P >> 16);
-	flipped |= (((unsigned long long) outflank_h << 16) - outflank_h) & 0x00007e0000000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_a1a6c8, outflank_a8a6f1, outflank_h;
-	unsigned long long flipped;
-
-	outflank_a1a6c8 = OUTFLANK_5[((O & 0x0004020202020200) * 0x0081020408101010) >> 57]
-		& (((P & 0x0804020202020202) * 0x0081020408101010) >> 56);
-	flipped = FLIPPED_5_V[outflank_a1a6c8] & 0x0004020202020200;
-
-	outflank_a8a6f1 = OUTFLANK_2[((O & 0x0002020408102000) * 0x0404040404040201) >> 58]
-		& ((((P & 0x0202020408102040) >> 1) * 0x0404040404040201) >> 56);
-	flipped |= FLIPPED_2_U[outflank_a8a6f1] & 0x0002020408102000;
-
-	outflank_h = ((unsigned int) (O >> 16) + 0x04000000) & (unsigned int) (P >> 16);
-	flipped |= (((unsigned long long) outflank_h << 16) - outflank_h) & 0x00007c0000000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_5[((O & 0x0004040404040400) * 0x0040810204081020) >> 57]
-		& (((P & 0x0404040404040404) * 0x0040810204081020) >> 56);
-	flipped = FLIPPED_5_V[outflank_v] & 0x0004040404040400;
-
-	outflank_h = OUTFLANK_2[(O >> 41) & 0x3f] & (P >> 40);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x0000ff0000000000;
-
-	outflank_d7 = OUTFLANK_2[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0102040810204080) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_2_H[outflank_d7] & 0x0002040810204000;
-
-	flipped |= ((P >> 9) | (P << 9)) & 0x0008000200000000 & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_5[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]
-		& (((P & 0x0808080808080808) * 0x0020408102040810) >> 56);
-	flipped = FLIPPED_5_V[outflank_v] & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 41) & 0x3f] & (P >> 40);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x0000ff0000000000;
-
-	outflank_d = OUTFLANK_3[((O & 0x0000081422400000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000081422418000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0000081422400000;	// A3D6H2
-
-	flipped |= (((P >> 9) & 0x0010000000000000) | ((P >> 7) & 0x0004000000000000)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_5[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]
-		& (((P & 0x1010101010101010) * 0x0010204081020408) >> 56);
-	flipped = FLIPPED_5_V[outflank_v] & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 41) & 0x3f] & (P >> 40);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x0000ff0000000000;
-
-	outflank_d = OUTFLANK_4[((O & 0x0000102844020000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0000102844820100) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0000102844020000;	// A2E6H3
-
-	flipped |= (((P >> 9) & 0x0020000000000000) | ((P >> 7) & 0x0008000000000000)) & O;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_v, outflank_d9;
-	unsigned long long flipped;
-
-	outflank_v = OUTFLANK_5[((O & 0x0020202020202000) * 0x0008102040810204) >> 57]
-		& (((P & 0x2020202020202020) * 0x0008102040810204) >> 56);
-	flipped = FLIPPED_5_V[outflank_v] & 0x0020202020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 41) & 0x3f] & (P >> 40);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x0000ff0000000000;
-
-	flipped |= ((P >> 7) | (P << 7)) & 0x0010004000000000 & O;
-
-	outflank_d9 = OUTFLANK_5[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]
-		& (((P & 0x8040201008040201) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_5_H[outflank_d9] & 0x0040201008040200;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_b1g6g8, outflank_e8g6g1;
-	unsigned long long flipped;
-
-	outflank_b1g6g8 = OUTFLANK_5[((O & 0x0040402010080400) * 0x0080808080808102) >> 57]
-		& (((P & 0x4040402010080402) * 0x0080808080808102) >> 56);
-	flipped = FLIPPED_5_V[outflank_b1g6g8] & 0x0040402010080400;
-
-	outflank_e8g6g1 = OUTFLANK_2[((O & 0x0020404040404000) * 0x0001008040201010) >> 57]
-		& ((((P & 0x1020404040404040) >> 4) * 0x2010080402010101) >> 56);
-	flipped |= FLIPPED_2_U[outflank_e8g6g1] & 0x0020404040404000;
-
-	outflank_h = OUTFLANK_7[(O >> 40) & 0x3e] & (P >> 39);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3e) << 40;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H6(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_c1h6h8, outflank_f8h6h1;
-	unsigned long long flipped;
-
-	outflank_c1h6h8 = OUTFLANK_5[((O & 0x0080804020100800) * 0x0040404040404081) >> 57]
-		& (((P & 0x8080804020100804) * 0x0040404040404081) >> 56);
-	flipped = FLIPPED_5_V[outflank_c1h6h8] & 0x0080804020100800;
-
-	outflank_f8h6h1 = OUTFLANK_2[((O & 0x0040808080808000) * 0x0000804020100808) >> 57]
-		& ((((P & 0x2040808080808080) >> 5) * 0x2010080402010101) >> 56);
-	flipped |= FLIPPED_2_U[outflank_f8h6h1] & 0x0040808080808000;
-
-	outflank_h = OUTFLANK_7[(O >> 41) & 0x3f] & (P >> 40);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3f) << 41;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long flipped, outflank_v, outflank_d7;
-	unsigned int outflank_h;
-
-	outflank_v = (0x0000010000000000 >> CONTIG_UP[((O & 0x0000010101010100) * 0x0102040810204080) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000010101010100;
-
-	outflank_d7 = (0x0000020000000000 >> CONTIG_UPRIGHT[((O & 0x0000020408102000) * 0x0101010101010101) >> 57]) & P;
-	flipped |= (-outflank_d7 * 2) & 0x0000020408102000;
-
-	outflank_h = ((unsigned int) (O >> 24) + 0x02000000) & (unsigned int) (P >> 24);
-	flipped |= (((unsigned long long) outflank_h << 24) - outflank_h) & 0x007e000000000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long flipped, outflank_v, outflank_d7;
-	unsigned int outflank_h;
-
-	outflank_v = (0x0000020000000000 >> CONTIG_UP[((O & 0x0000020202020200) * 0x0081020408102040) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000020202020200;
-
-	outflank_d7 = (0x0000040000000000 >> CONTIG_UPRIGHT[((O & 0x0000040810204000) * 0x0101010101010101) >> 58]) & P;
-	flipped |= (-outflank_d7 * 2) & 0x0000040810204000;
-
-	outflank_h = ((unsigned int) (O >> 24) + 0x04000000) & (unsigned int) (P >> 24);
-	flipped |= (((unsigned long long) outflank_h << 24) - outflank_h) & 0x007c000000000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0000040000000000 >> CONTIG_UP[((O & 0x0000040404040400) * 0x0040810204081020) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000040404040400;
-
-	outflank_h = OUTFLANK_2[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_2_H[outflank_h] & 0x00ff000000000000;
-
-	outflank_d = OUTFLANK_2[((O & 0x00040a1020400000) * 0x0101010101010101) >> 57]
-		& (((P & 0x00040a1120408000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_2_H[outflank_d] & 0x00040a1020400000;	// A5C7H2
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0000080000000000 >> CONTIG_UP[((O & 0x0000080808080800) * 0x0020408102040810) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_3_H[outflank_h] & 0x00ff000000000000;
-
-	outflank_d = OUTFLANK_3[((O & 0x0008142240000000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0008142241800000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0008142240000000;	// A4D7H3
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0000100000000000 >> CONTIG_UP[((O & 0x0000101010101000) * 0x0010204081020408) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_4_H[outflank_h] & 0x00ff000000000000;
-
-	outflank_d = OUTFLANK_4[((O & 0x0010284402000000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0010284482010000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d] & 0x0010284402000000;	// A3E7H4
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0000200000000000 >> CONTIG_UP[((O & 0x0000202020202000) * 0x0008102040810204) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000202020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= FLIPPED_5_H[outflank_h] & 0x00ff000000000000;
-
-	outflank_d = OUTFLANK_5[((O & 0x0020500804020000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0020508804020100) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_5_H[outflank_d] & 0x0020500804020000;	// A2F7H5
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = (0x0000400000000000 >> CONTIG_UP[((O & 0x0000404040404000) * 0x0004081020408102) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000404040404000;
-
-	outflank_d9 = (0x0000200000000000 >> CONTIG_UPLEFT[((O & 0x0000201008040200) * 0x0101010101010101) >> 56]) & P;
-	flipped |= (-outflank_d9 * 2) & 0x0000201008040200;
-
-	outflank_h = OUTFLANK_7[(O >> 48) & 0x3e] & (P >> 47);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3e) << 48;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H7(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = (0x0000800000000000 >> CONTIG_UP[((O & 0x0000808080808000) * 0x0002040810204081) >> 56]) & P;
-	flipped  = (-outflank_v * 2) & 0x0000808080808000;
-
-	outflank_d9 = (0x0000400000000000 >> CONTIG_UPLEFT[((O & 0x0000402010080400) * 0x0101010101010101) >> 57]) & P;
-	flipped |= (-outflank_d9 * 2) & 0x0000402010080400;
-
-	outflank_h = OUTFLANK_7[(O >> 49) & 0x3f] & (P >> 48);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3f) << 49;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square A8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_A8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long flipped, outflank_v, outflank_h, outflank_d7;
-
-	outflank_v = (0x0001000000000000 >> CONTIG_UP[((O & 0x0001010101010100) * 0x0102040810204080) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0001010101010100;
-
-	outflank_d7 = (0x0002000000000000 >> CONTIG_UPRIGHT[((O & 0x0002040810204000) * 0x0101010101010101) >> 57]) & P;
-	flipped |= (-outflank_d7 * 2) & 0x0002040810204000;
-
-	outflank_h = (O + 0x0200000000000000) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7e00000000000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square B8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_B8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned long long flipped, outflank_v, outflank_h, outflank_d7;
-
-	outflank_v = (0x0002000000000000 >> CONTIG_UP[((O & 0x0002020202020200) * 0x0081020408102040) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0002020202020200;
-
-	outflank_d7 = (0x0004000000000000 >> CONTIG_UPRIGHT[((O & 0x0004081020400000) * 0x0101010101010101) >> 58]) & P;
-	flipped |= (-outflank_d7 * 2) & 0x0004081020400000;
-
-	outflank_h = (O + 0x0400000000000000) & P;
-	flipped |= (outflank_h - (outflank_h >> 8)) & 0x7c00000000000000;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square C8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_C8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0004000000000000 >> CONTIG_UP[((O & 0x0004040404040400) * 0x0040810204081020) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0004040404040400;
-
-	outflank_h = OUTFLANK_2[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_2_H[outflank_h] & 0xff00000000000000;
-
-	outflank_d = OUTFLANK_2[((O & 0x040a102040000000) * 0x0101010101010101) >> 57]
-		& (((P & 0x040a112040800000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_2_H[outflank_d] & 0x040a102040000000;	// A6C8H3
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square D8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_D8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0008000000000000 >> CONTIG_UP[((O & 0x0008080808080800) * 0x0020408102040810) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0008080808080800;
-
-	outflank_h = OUTFLANK_3[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_3_H[outflank_h] & 0xff00000000000000;
-
-	outflank_d = OUTFLANK_3[((O & 0x0814224000000000) * 0x0101010101010101) >> 57]
-		& (((P & 0x0814224180000000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_3_H[outflank_d] & 0x0814224000000000;	// A5D8H4
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square E8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_E8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0010000000000000 >> CONTIG_UP[((O & 0x0010101010101000) * 0x0010204081020408) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0010101010101000;
-
-	outflank_h = OUTFLANK_4[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_4_H[outflank_h] & 0xff00000000000000;
-
-	outflank_d = OUTFLANK_4[((O & 0x1028440200000000) * 0x0101010101010101) >> 57]
-		& (((P & 0x1028448201000000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_4_H[outflank_d] & 0x1028440200000000;	// A4E8H5
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square F8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_F8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h, outflank_d;
-	unsigned long long flipped, outflank_v;
-
-	outflank_v = (0x0020000000000000 >> CONTIG_UP[((O & 0x0020202020202000) * 0x0008102040810204) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0020202020202000;
-
-	outflank_h = OUTFLANK_5[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= FLIPPED_5_H[outflank_h] & 0xff00000000000000;
-
-	outflank_d = OUTFLANK_5[((O & 0x2050080402000000) * 0x0101010101010101) >> 57]
-		& (((P & 0x2050880402010000) * 0x0101010101010101) >> 56);
-	flipped |= FLIPPED_5_H[outflank_d] & 0x2050080402000000;	// A3F8H6
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square G8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_G8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = (0x0040000000000000 >> CONTIG_UP[((O & 0x0040404040404000) * 0x0004081020408102) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0040404040404000;
-
-	outflank_d9 = (0x0020000000000000 >> CONTIG_UPLEFT[((O & 0x0020100804020000) * 0x0101010101010101) >> 56]) & P;
-	flipped |= (-outflank_d9 * 2) & 0x0020100804020000;
-
-	outflank_h = OUTFLANK_7[(O >> 56) & 0x3e] & (P >> 55);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3e) << 56;
-
-	return flipped;
-}
-
-/**
- * Compute flipped discs when playing on square H8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_H8(const unsigned long long P, const unsigned long long O)
-{
-	unsigned int outflank_h;
-	unsigned long long flipped, outflank_v, outflank_d9;
-
-	outflank_v = (0x0080000000000000 >> CONTIG_UP[((O & 0x0080808080808000) * 0x0002040810204081) >> 57]) & P;
-	flipped  = (-outflank_v * 2) & 0x0080808080808000;
-
-	outflank_d9 = (0x0040000000000000 >> CONTIG_UPLEFT[((O & 0x0040201008040200) * 0x0101010101010101) >> 57]) & P;
-	flipped |= (-outflank_d9 * 2) & 0x0040201008040200;
-
-	outflank_h = OUTFLANK_7[(O >> 57) & 0x3f] & (P >> 56);
-	flipped |= (unsigned long long) ((-outflank_h) & 0x3f) << 57;
-
-	return flipped;
-}
-
-/**
- * Compute (zero-) flipped discs when plassing.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static unsigned long long flip_pass(const unsigned long long P, const unsigned long long O)
-{
-	(void) P; // useless code to shut-up compiler warning
-	(void) O;
-	return 0;
-}
-
-
-/** Array of functions to compute flipped discs */
-unsigned long long (*flip[])(const unsigned long long, const unsigned long long) = {
-	flip_A1, flip_B1, flip_C1, flip_D1, flip_E1, flip_F1, flip_G1, flip_H1,
-	flip_A2, flip_B2, flip_C2, flip_D2, flip_E2, flip_F2, flip_G2, flip_H2,
-	flip_A3, flip_B3, flip_C3, flip_D3, flip_E3, flip_F3, flip_G3, flip_H3,
-	flip_A4, flip_B4, flip_C4, flip_D4, flip_E4, flip_F4, flip_G4, flip_H4,
-	flip_A5, flip_B5, flip_C5, flip_D5, flip_E5, flip_F5, flip_G5, flip_H5,
-	flip_A6, flip_B6, flip_C6, flip_D6, flip_E6, flip_F6, flip_G6, flip_H6,
-	flip_A7, flip_B7, flip_C7, flip_D7, flip_E7, flip_F7, flip_G7, flip_H7,
-	flip_A8, flip_B8, flip_C8, flip_D8, flip_E8, flip_F8, flip_G8, flip_H8,
-	flip_pass, flip_pass
-};
-
->>>>>>> b3f048d (copyright changes)
-=======
->>>>>>> 1525ec4 (Use same OutflankToFlip as flip_bitscan, and fix typo bug)
diff --git a/src/flip_carry_sse_32.c b/src/flip_carry_sse_32.c
index d99bbdb..ff0e0af 100644
--- a/src/flip_carry_sse_32.c
+++ b/src/flip_carry_sse_32.c
@@ -41,15 +41,7 @@
  * @version 4.4
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-=======
-#ifdef USE_GAS_MMX
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
 #include <string.h>	// memcpy
 #endif
 
@@ -73,35 +65,6 @@
 #define	STATIC	static
 #endif
 
-<<<<<<< HEAD
-#ifdef __TURBOC__
-// bcc32 -c -pr -O1 flip_carry_sse_32.c
-#pragma warn -ngu
-#define	UINT64	unsigned __int64
-#else
-<<<<<<< HEAD
-#include "bit.h"
-#define	UINT64	unsigned long long
-#endif
-
-#ifdef USE_GAS_MMX
-#define	STATIC	__attribute__((used))
-#ifdef __unix__
-#define	_
-#else
-#define	_	"_"
-#endif
-#else
-#define	STATIC	static
-#endif
-
-=======
-#define	UINT64	unsigned long long
-#endif
-
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
->>>>>>> 72924b1 (Fix macro expansion; correct comments)
 #define	ULL(H,L)	(((UINT64) (H) << 32) | (L))
 
 /** outflank array (indexed with inner 6 bits) */
@@ -154,15 +117,7 @@ STATIC const unsigned char OUTFLANK_5[64] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 }; */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-STATIC const unsigned char OUTFLANK_7[64] = {
-=======
-static const unsigned char OUTFLANK_7[64] = {
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 STATIC const unsigned char OUTFLANK_7[64] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
@@ -171,15 +126,7 @@ STATIC const unsigned char OUTFLANK_7[64] = {
 
 
 /** flip array (indexed with outflank, returns inner 6 bits) */
-<<<<<<< HEAD
-<<<<<<< HEAD
 STATIC const UINT64 FLIPPED_2_H[130] = {
-=======
-static const UINT64 FLIPPED_2_H[130] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-STATIC const UINT64 FLIPPED_2_H[130] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x0202020202020202, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0808080808080808, 0x0a0a0a0a0a0a0a0a, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -199,15 +146,7 @@ STATIC const UINT64 FLIPPED_2_H[130] = {
 	0x7878787878787878, 0x7a7a7a7a7a7a7a7a
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-STATIC const UINT64 FLIPPED_3_H[131] = {
-=======
-static const UINT64 FLIPPED_3_H[131] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 STATIC const UINT64 FLIPPED_3_H[131] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x0606060606060606, 0x0404040404040404, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -227,15 +166,7 @@ STATIC const UINT64 FLIPPED_3_H[131] = {
 	0x7070707070707070, 0x7676767676767676, 0x7474747474747474
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 STATIC const UINT64 FLIPPED_4_H[133] = {
-=======
-static const UINT64 FLIPPED_4_H[133] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-STATIC const UINT64 FLIPPED_4_H[133] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x0e0e0e0e0e0e0e0e, 0x0c0c0c0c0c0c0c0c, 0x0000000000000000, 0x0808080808080808, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -255,15 +186,7 @@ STATIC const UINT64 FLIPPED_4_H[133] = {
 	0x6060606060606060, 0x6e6e6e6e6e6e6e6e, 0x6c6c6c6c6c6c6c6c, 0x0000000000000000, 0x6868686868686868
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-STATIC const UINT64 FLIPPED_5_H[137] = {
-=======
-static const UINT64 FLIPPED_5_H[137] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 STATIC const UINT64 FLIPPED_5_H[137] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x1e1e1e1e1e1e1e1e, 0x1c1c1c1c1c1c1c1c, 0x0000000000000000, 0x1818181818181818, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x1010101010101010, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -284,15 +207,7 @@ STATIC const UINT64 FLIPPED_5_H[137] = {
 	0x5050505050505050
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-STATIC const UINT64 FLIPPED_3_V[131] = {
-=======
-static const UINT64 FLIPPED_3_V[131] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 STATIC const UINT64 FLIPPED_3_V[131] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x0000000000ffff00, 0x0000000000ff0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -312,15 +227,7 @@ STATIC const UINT64 FLIPPED_3_V[131] = {
 	0x00ffffff00000000, 0x00ffffff00ffff00, 0x00ffffff00ff0000
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 STATIC const UINT64 FLIPPED_4_V[133] = {
-=======
-static const UINT64 FLIPPED_4_V[133] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-STATIC const UINT64 FLIPPED_4_V[133] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x00000000ffffff00, 0x00000000ffff0000, 0x0000000000000000, 0x00000000ff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -340,15 +247,7 @@ STATIC const UINT64 FLIPPED_4_V[133] = {
 	0x00ffff0000000000, 0x00ffff00ffffff00, 0x00ffff00ffff0000, 0x0000000000000000, 0x00ffff00ff000000
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-STATIC const UINT64 FLIPPED_5_V[137] = {
-=======
-static const UINT64 FLIPPED_5_V[137] = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 STATIC const UINT64 FLIPPED_5_V[137] = {
->>>>>>> 6506166 (More SSE optimizations)
 	0x0000000000000000, 0x000000ffffffff00, 0x000000ffffff0000, 0x0000000000000000, 0x000000ffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x000000ff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
 	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
@@ -369,26 +268,11 @@ STATIC const UINT64 FLIPPED_5_V[137] = {
 	0x00ff00ff00000000
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-<<<<<<< HEAD
-/*
- * Set all bits below the sole outflank bit if outfrank != 0
- */
-#if (_MSC_VER >= 1800) && (defined(_M_IX86) || defined (_M_X64))
-=======
 
 /*
  * Set all bits below the sole outflank bit if outfrank != 0
  */
-<<<<<<< HEAD
-#if _MSC_VER >= 1800
->>>>>>> 6506166 (More SSE optimizations)
-=======
 #if (_MSC_VER >= 1800) && (defined(_M_IX86) || defined (_M_X64))
->>>>>>> f2da03e (Refine arm builds adding neon support.)
 static inline unsigned long long OutflankToFlipmask(unsigned long long outflank) {
 	unsigned int flipmaskL, flipmaskH, outflankH = outflank >> 32;
 	unsigned char cy;
@@ -400,39 +284,10 @@ static inline unsigned long long OutflankToFlipmask(unsigned long long outflank)
 }
 #else
 	#define OutflankToFlipmask(x)	((x) - (unsigned int) ((x) != 0))
-<<<<<<< HEAD
-=======
-#ifdef hasSSE2
-	#include <x86intrin.h>
-	#define	SSE2
-#else
-=======
-#if !defined(hasSSE2) && defined(USE_GAS_MMX)
->>>>>>> 1dc032e (Improve visual c compatibility)
-	// #pragma GCC push_options
-	// #pragma GCC target ("sse2")
-	#include <emmintrin.h>
-	// #pragma GCC pop_options
-	#define	SSE2	/* __attribute__ ((__target__ ("sse2"))) */	// seems still buggy, 
-		// therefore SSE intrinsics cannot be used in dispatching version.
-<<<<<<< HEAD
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-#endif
-
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-=======
-#else
-	#define	SSE2
 #endif
 
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 
-static const V2DI	minusone = {{ -1LL, -1LL }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-
-<<<<<<< HEAD
 #define minusone	_mm_set1_epi32(-1)
 static const V2DI	k1e52 = {{ (1023ULL + 52) << 52, (1023ULL + 52) << 52 }};
 static const V2DI	expmask = {{ 0xfff0000000000000, 0xfff0000000000000 }};
@@ -464,60 +319,6 @@ STATIC const UINT64 FLIPPED_7_V[33] = {
 	0x00ff000000000000
 };
 
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-=======
-#if 0 // hasMMX, hasSSE2
-static const UINT64 mask_c1c8 = 0x0404040404040404;
-static const UINT64 mask_d1d8 = 0x0808080808080808;
-static const UINT64 mask_e1e8 = 0x1010101010101010;
-static const UINT64 mask_f1f8 = 0x2020202020202020;
-=======
-#endif
-
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-
-#define minusone	_mm_set1_epi32(-1)
-static const V2DI	k1e52 = {{ (1023ULL + 52) << 52, (1023ULL + 52) << 52 }};
-static const V2DI	expmask = {{ 0xfff0000000000000, 0xfff0000000000000 }};
-static const V2DI	minustwo   = {{ 0xfffefffefffefffe, 0xfffefffefffefffe }};
-static const V2DI	minusfour  = {{ 0xfffcfffcfffcfffc, 0xfffcfffcfffcfffc }};
-static const V2DI	minuseight = {{ 0xfff8fff8fff8fff8, 0xfff8fff8fff8fff8 }};
-static const V2DI	minus0400  = {{ 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00 }};
-static const V2DI	minus0800  = {{ 0xf800f800f800f800, 0xf800f800f800f800 }};
-static const V2DI	k02000100  = {{ 0x0100000000000100, 0x0200000000000200 }};
-
->>>>>>> 6506166 (More SSE optimizations)
-
-STATIC const UINT64 OUTFLANK_7_V[64] = {
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000,
-	0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000, 0x0000ff0000000000,
-	0x000000ff00000000, 0x000000ff00000000, 0x000000ff00000000, 0x000000ff00000000, 0x000000ff00000000, 0x000000ff00000000, 0x000000ff00000000, 0x000000ff00000000,
-	0x00000000ff000000, 0x00000000ff000000, 0x00000000ff000000, 0x00000000ff000000, 0x0000000000ff0000, 0x0000000000ff0000, 0x000000000000ff00, 0x00000000000000ff
-};
-
-
-<<<<<<< HEAD
-static const UINT64 mask_a3f8 = 0x2010080402010000;
-static const UINT64 mask_a2g8 = 0x4020100804020100;
-static const UINT64 mask_a1h8 = 0x8040201008040201;
-static const UINT64 mask_b1h7 = 0x0080402010080402;
-static const UINT64 mask_c1h6 = 0x0000804020100804;
-#endif
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-STATIC const UINT64 FLIPPED_7_V[33] = {
-	0x0000000000000000, 0x00ffffffffffff00, 0x00ffffffffff0000, 0x0000000000000000, 0x00ffffffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffffff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffff0000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ff000000000000
-};
->>>>>>> 6506166 (More SSE optimizations)
-
 #if defined(hasSSE2) || defined(USE_MSVC_X86)
 
 #define	SWAP64	0x4e	// for _mm_shuffle_epi32
@@ -554,15 +355,7 @@ static inline __m128i MS1B_epi52(__m128i x) {
  * 0xffffffffffffffff (-1) if outflank is 0
  * 0x0000000000000000 ( 0) if a 1 is in 64 bit
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 static inline __m128i flipmask (__m128i outflank) {
-=======
-static inline __m128i SSE2 flipmask (__m128i outflank) {
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-static inline __m128i flipmask (__m128i outflank) {
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	return _mm_cmpeq_epi32(_mm_shuffle_epi32(outflank, SWAP32), outflank);
 }
 
@@ -578,53 +371,17 @@ static inline __m128i load64x2 (const UINT64 *x0, const UINT64 *x1) {
  *
  * AMD 47414 pp.96
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-static inline __m128i set1_by_movd (unsigned int L, unsigned int H) {
-=======
-static inline __m128i SSE2 set1_by_movd (unsigned int L, unsigned int H) {
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 static inline __m128i set1_by_movd (unsigned int L, unsigned int H) {
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	__m128i	Y;
 	Y = _mm_unpacklo_epi32(_mm_cvtsi32_si128(L), _mm_cvtsi32_si128(H));
 	return _mm_unpacklo_epi64(Y, Y);
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 6506166 (More SSE optimizations)
 #define FLIP_CARRY_2_VEC(flip_l)	__m128i	outflank_vd;\
 	outflank_vd = _mm_andnot_si128(set1_by_movd(OL, OH), mask.v2);\
 	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), set1_by_movd(PL, PH));\
 	outflank_vd = _mm_and_si128(mask.v2, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));\
 	flipped = _mm_cvtsi128_si64(_mm_or_si128(outflank_vd, _mm_shuffle_epi32(outflank_vd, SWAP64))) | (flip_l)
-<<<<<<< HEAD
-=======
-/**
- * _mm_movepi64_pi64 equivalent to avoid large-to-small mismatch
- *
- * AMD 47414 pp.96
- */
-static inline UINT64 SSE2 movepi64_by_movd(__v2di x)
-{
-#ifdef __SSE2__
-	return ((unsigned int) _mm_cvtsi128_si32(x))
-		| ((UINT64) _mm_cvtsi128_si32(_mm_srli_epi64(x, 32)) << 32);
-#else
-	UINT64 y;
-	__asm__ ( "movd	%1,%%eax\n\t"
-		"psrlq	$32,%1\n\t"
-		"movd	%1,%%edx"
-		: "=A" (y) : "x" (x));
-	return y;
-#endif
-}
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
 
 #define FLIP_CARRY_AB12(next)	__m128i outflank_vd, outflank_h, flipped_v2, PP, OO;\
 	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[next]);\
@@ -638,15 +395,6 @@ static inline UINT64 SSE2 movepi64_by_movd(__v2di x)
 	flipped = _mm_cvtsi128_si64(_mm_or_si128(flipped_v2, _mm_shuffle_epi32(flipped_v2, SWAP64)))
 
 #define	FLIP_CARRY_3_VEC(flip_l)	__m128i	outflank_vd, outflank_d_, PP, OO, mask2_;\
-=======
-#define FLIP_CARRY_2_VEC(flip_l)	__v2di	outflank_v_d;\
-	outflank_v_d = _mm_andnot_si128(mask, (set1_by_movd(OL, OH) | mask) - minusone) & set1_by_movd(PL, PH);\
-	outflank_v_d = _mm_andnot_si128(mask, outflank_v_d - (flipmask(outflank_v_d) - minusone));\
-	outflank_v_d |= _mm_shuffle_epi32(outflank_v_d, SWAP64);\
-	flipped = (flip_l) | movepi64_by_movd(outflank_v_d)
-
-#define	FLIP_CARRY_3_VEC(flip_l)	__v2di	outflank_v_d, outflank_d_0, PP, OO, mask2_;\
->>>>>>> e558fdb (Some cleanups for clang / android build)
 	OO = set1_by_movd(OL, OH);\
 	PP = set1_by_movd(PL, PH);\
 	mask2_ = _mm_loadl_epi64((__m128i *) &mask2);\
@@ -681,75 +429,6 @@ static inline UINT64 SSE2 movepi64_by_movd(__v2di x)
 	__m128i outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);\
 	flipped_v2 = _mm_or_si128(flipped_v2, _mm_subs_epu8(outflank_h, next_h));\
 	flipped = _mm_cvtsi128_si64(_mm_or_si128(flipped_v2, _mm_shuffle_epi32(flipped_v2, SWAP64)))
-=======
-#define FLIP_CARRY_2_VEC(flip_l)	__m128i	outflank_v_d;\
-	outflank_v_d = _mm_and_si128(_mm_andnot_si128(mask.v2, _mm_sub_epi64(_mm_or_si128(set1_by_movd(OL, OH), mask.v2), minusone.v2)), set1_by_movd(PL, PH));\
-	outflank_v_d = _mm_andnot_si128(mask.v2, _mm_sub_epi64(outflank_v_d, _mm_sub_epi64(flipmask(outflank_v_d), minusone.v2)));\
-	outflank_v_d = _mm_or_si128(outflank_v_d, _mm_shuffle_epi32(outflank_v_d, SWAP64));\
-	flipped = (flip_l) | _mm_cvtsi128_si64(outflank_v_d)
-=======
->>>>>>> 6506166 (More SSE optimizations)
-
-#define FLIP_CARRY_AB12(next)	__m128i outflank_vd, outflank_h, flipped_v2, PP, OO;\
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[next]);\
-	OO = set1_by_movd(OL, OH);\
-	PP = set1_by_movd(PL, PH);\
-	outflank_vd = _mm_andnot_si128(OO, mask.v2);\
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);\
-	flipped_v2 = _mm_and_si128(mask.v2, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));\
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);\
-	flipped_v2 = _mm_or_si128(flipped_v2, _mm_subs_epu8(outflank_h, next_h));\
-	flipped = _mm_cvtsi128_si64(_mm_or_si128(flipped_v2, _mm_shuffle_epi32(flipped_v2, SWAP64)))
-
-#define	FLIP_CARRY_3_VEC(flip_l)	__m128i	outflank_vd, outflank_d_, PP, OO, mask2_;\
-	OO = set1_by_movd(OL, OH);\
-	PP = set1_by_movd(PL, PH);\
-	mask2_ = _mm_loadl_epi64((__m128i *) &mask2);\
-<<<<<<< HEAD
-	outflank_v_d = _mm_and_si128(_mm_andnot_si128(mask01.v2, _mm_sub_epi64(_mm_or_si128(OO, mask01.v2), minusone.v2)), PP);\
-	outflank_d_0 = _mm_and_si128(_mm_andnot_si128(mask2_, _mm_sub_epi64(_mm_or_si128(OO, mask2_), minusone.v2)), PP);\
-	outflank_v_d = _mm_andnot_si128(mask01.v2, _mm_sub_epi64(outflank_v_d, _mm_sub_epi64(flipmask(outflank_v_d), minusone.v2)));\
-	outflank_d_0 = _mm_andnot_si128(mask2_, _mm_sub_epi64(outflank_d_0, _mm_sub_epi64(flipmask(outflank_d_0), minusone.v2)));\
-	outflank_v_d = _mm_or_si128(outflank_v_d, _mm_or_si128(_mm_shuffle_epi32(outflank_v_d, SWAP64), outflank_d_0));\
-<<<<<<< HEAD
-	flipped = (flip_l) | movepi64_by_movd(outflank_v_d)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	flipped = (flip_l) | _mm_cvtsi128_si64(outflank_v_d)
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
-	outflank_vd = _mm_and_si128(_mm_andnot_si128(mask01.v2, _mm_sub_epi64(_mm_or_si128(OO, mask01.v2), minusone)), PP);\
-	outflank_d_ = _mm_and_si128(_mm_andnot_si128(mask2_, _mm_sub_epi64(_mm_or_si128(OO, mask2_), minusone)), PP);\
-	outflank_vd = _mm_andnot_si128(mask01.v2, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));\
-	outflank_d_ = _mm_andnot_si128(mask2_, _mm_sub_epi64(outflank_d_, _mm_sub_epi64(flipmask(outflank_d_), minusone)));\
-	outflank_vd = _mm_or_si128(outflank_vd, _mm_or_si128(_mm_shuffle_epi32(outflank_vd, SWAP64), outflank_d_));\
-	flipped = _mm_cvtsi128_si64(outflank_vd) | (flip_l)
-
-#define FLIP_CARRY_GH12	__m128i outflank_vd, outflank_h, flipped_v2, PP, OO;\
-	OO = set1_by_movd(OL, OH);\
-	PP = set1_by_movd(PL, PH);\
-	outflank_vd = _mm_andnot_si128(OO, mask1.v2);\
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);\
-	flipped_v2 = _mm_and_si128(mask1.v2, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));\
-	outflank_h = _mm_and_si128(MS1B_epi32(_mm_andnot_si128(OO, mask2.v2)), PP);\
-	flipped_v2 = _mm_or_si128(flipped_v2, _mm_and_si128(_mm_mullo_epi16(outflank_h, minustwo.v2), mask2.v2));\
-	flipped = _mm_cvtsi128_si64(_mm_or_si128(flipped_v2, _mm_shuffle_epi32(flipped_v2, SWAP64)))
-
-#define FLIP_MS1B_2_VEC	__m128i PP = set1_by_movd(PL, PH);\
-	__m128i OO = set1_by_movd(OL, OH);\
-	__m128i outflank_vd = _mm_and_si128(MS1B_epi52(_mm_andnot_si128(OO, mask1.v2)), PP);\
-	__m128i flipped_v2 = _mm_and_si128(_mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd)), mask1.v2);\
-	flipped = _mm_cvtsi128_si64(_mm_or_si128(flipped_v2, _mm_shuffle_epi32(flipped_v2, SWAP64)))
-
-#define FLIP_MS1B_AB78(next)	__m128i PP = set1_by_movd(PL, PH);\
-	__m128i OO = set1_by_movd(OL, OH);\
-	__m128i outflank_vd = _mm_and_si128(MS1B_epi52(_mm_andnot_si128(OO, mask1.v2)), PP);\
-	__m128i flipped_v2 = _mm_and_si128(_mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd)), mask1.v2);\
-	__m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[next]);\
-	__m128i outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);\
-	flipped_v2 = _mm_or_si128(flipped_v2, _mm_subs_epu8(outflank_h, next_h));\
-	flipped = _mm_cvtsi128_si64(_mm_or_si128(flipped_v2, _mm_shuffle_epi32(flipped_v2, SWAP64)))
->>>>>>> 6506166 (More SSE optimizations)
 
 #else
 
@@ -822,10 +501,6 @@ static inline UINT64 SSE2 movepi64_by_movd(__v2di x)
 	: "=A" (flipped)\
 	: "m" (PL), "m" (PH), "m" (OL), "m" (OH), "m" (mask01), "m" (mask2), "a" (flip_l))
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 6506166 (More SSE optimizations)
 #define FLIP_CARRY_GH12	__asm__(\
 		"movd	%4, %%xmm3\n\t"		"movd	%2, %%xmm2\n\t"\
 		"movd	%3, %%xmm1\n\t"		"movd	%1, %%xmm0\n\t"		"pcmpeqd %%xmm4, %%xmm4\n\t"\
@@ -901,15 +576,6 @@ static inline UINT64 SSE2 movepi64_by_movd(__v2di x)
 #endif // hasSSE2
 
 #endif // has| USE_GAS_MMX | _P_IX86
-=======
-#endif // hasSSE2
-
-<<<<<<< HEAD
-#endif // hasSSE2 | USE_GAS_MMX | _P_IX86
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-#endif // has| USE_GAS_MMX | _P_IX86
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
 /**
  * Compute flipped discs when playing on square A1.
@@ -921,68 +587,25 @@ static inline UINT64 SSE2 movepi64_by_movd(__v2di x)
 #ifndef hasSSE2
 static UINT64 flip_A1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned char outflank_h;
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0101010101010100) + 1) & ULL(PH, PL) & 0x0101010101010000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010100;
-=======
-	unsigned int outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0101010101010100) + 1) & ULL(PH, PL) & 0x0101010101010000;
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0001010101010100;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	unsigned char outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0101010101010100) + 1) & ULL(PH, PL) & 0x0101010101010000;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010100;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (OL + 0x02) & PL;
 	flipped |= ((outflank_h * 0xff) >> 8) & 0x7e;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x8040201008040200) + 1) & ULL(PH, PL) & 0x8040201008040000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040200;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0040201008040200;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040200;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_A1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask = {{ 0x0101010101010100, 0x8040201008040200 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_A1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0101010101010100, ~0x8040201008040200 }};
-	unsigned int outflank_h = ((OL & 0x7e) + 0x02) & PL;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask = {{ 0x0101010101010100, 0x8040201008040200 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_AB12(1);
@@ -1001,68 +624,25 @@ static UINT64 SSE2 flip_sse_A1(unsigned int PL, unsigned int PH, unsigned int OL
 #ifndef hasSSE2
 static UINT64 flip_B1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned char outflank_h;
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0202020202020200) + 1) & ULL(PH, PL) & 0x0202020202020000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020200;
-=======
-	unsigned int outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0202020202020200) + 1) & ULL(PH, PL) & 0x0202020202020000;
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0002020202020200;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	unsigned char outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0202020202020200) + 1) & ULL(PH, PL) & 0x0202020202020000;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020200;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (OL + 0x04) & PL;
 	flipped |= ((outflank_h * 0xff) >> 8) & 0x7c;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x0080402010080400) + 1) & ULL(PH, PL) & 0x0080402010080000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0080402010080400;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000402010080400;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0080402010080400;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_B1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask = {{ 0x0202020202020200, 0x0080402010080400 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_B1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0202020202020200, ~0x0080402010080400 }};
-	unsigned int outflank_h = ((OL & 0x7c) + 0x04) & PL;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask = {{ 0x0202020202020200, 0x0080402010080400 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_AB12(2);
@@ -1085,15 +665,7 @@ static UINT64 flip_C1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0404040404040400) + 1) & ULL(PH, PL) & 0x0404040404040400;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040400;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0404040404040400;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040400;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_2[(OL >> 1) & 0x3f] & PL;
 	flipped |= (unsigned char) FLIPPED_2_H[outflank_h];
@@ -1101,41 +673,15 @@ static UINT64 flip_C1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flipped |= ((PL >> 7) & 0x00000200u & OL);
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x0000804020100800) + 1) & ULL(PH, PL) & 0x0000804020100800;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000804020100800;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000804020100800;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000804020100800;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_C1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const V2DI mask = {{ 0x0404040404040400, 0x0000804020100800 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_C1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x0404040404040400, ~0x0000804020100800 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_C1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0404040404040400, ~0x0000804020100800 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 	static const V2DI mask = {{ 0x0404040404040400, 0x0000804020100800 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_2[(OL >> 1) & 0x3f] & PL;
 	UINT64 flipped;
 
@@ -1159,15 +705,7 @@ static UINT64 flip_D1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0808080808080800) + 1) & ULL(PH, PL) & 0x0808080808080800;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0808080808080800;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_3[(OL >> 1) & 0x3f] & PL;
 	flipped |= (unsigned char) FLIPPED_3_H[outflank_h];
@@ -1179,28 +717,10 @@ static UINT64 flip_D1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_D1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x0808080808080800, 0x0000008040201000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_D1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x0808080808080800, ~0x0000008040201000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_D1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0808080808080800, ~0x0000008040201000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x0808080808080800, 0x0000008040201000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_3[(OL >> 1) & 0x3f] & PL;
 	unsigned int outflank_d7 = ((OL | ~0x01020400u) + 1) & PL & 0x01020400u;
 	UINT64 flipped;
@@ -1226,15 +746,7 @@ static UINT64 flip_E1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v;
 
 	outflank_v = ((ULL(OH, OL) | ~0x1010101010101000) + 1) & ULL(PH, PL) & 0x1010101010101000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x1010101010101000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_4[(OL >> 1) & 0x3f] & PL;
 	flipped |= (unsigned char) FLIPPED_4_H[outflank_h];
@@ -1246,28 +758,10 @@ static UINT64 flip_E1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_E1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x1010101010101000, 0x0000000102040800 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_E1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x1010101010101000, ~0x0000000102040800 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_E1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x1010101010101000, ~0x0000000102040800 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x1010101010101000, 0x0000000102040800 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_4[(OL >> 1) & 0x3f] & PL;
 	unsigned int outflank_d9 = ((OL | ~0x80402000u) + 1) & PL & 0x80402000u;
 	UINT64 flipped;
@@ -1293,57 +787,23 @@ static UINT64 flip_F1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x2020202020202000) + 1) & ULL(PH, PL) & 0x2020202020202000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020202000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x2020202020202000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020202000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_5[(OL >> 1) & 0x3f] & PL;
 	flipped |= (unsigned char) FLIPPED_5_H[outflank_h];
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0000010204081000) + 1) & ULL(PH, PL) & 0x0000010204080000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000204081000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000000204081000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000000204081000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= ((PL >> 9) & 0x00004000u & OL);
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_F1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x2020202020202000, 0x0000010204081000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_F1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x2020202020202000, ~0x0000010204081000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_F1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x2020202020202000, ~0x0000010204081000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x2020202020202000, 0x0000010204081000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_5[(OL >> 1) & 0x3f] & PL;
 	UINT64 flipped;
 
@@ -1367,58 +827,22 @@ static UINT64 flip_G1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x4040404040404000) + 1) & ULL(PH, PL) & 0x4040404040400000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040404000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0040404040404000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040404000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[OL & 0x3e] & (PL << 1);
 	flipped |= ((-outflank_h) & 0x3e) << 0;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0001020408102000) + 1) & ULL(PH, PL) & 0x0001020408100000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000020408102000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0001020408102000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_G1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask1 = {{ 0x4040404040404000, 0x0001020408102000 }};
 	static const V2DI mask2 = {{ 0x000000000000003f, 0 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_G1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x4040404040404000, ~0x0001020408102000 }};
-	unsigned int outflank_h = OUTFLANK_7[OL & 0x3e] & (PL << 1);
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask1 = {{ 0x4040404040404000, 0x0001020408102000 }};
-	static const V2DI mask2 = {{ 0x000000000000003f, 0 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_GH12;
@@ -1441,58 +865,22 @@ static UINT64 flip_H1(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x8080808080808000) + 1) & ULL(PH, PL) & 0x8080808080800000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080808000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0080808080808000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080808000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(OL >> 1) & 0x3f] & PL;
 	flipped |= ((-outflank_h) & 0x3f) << 1;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0102040810204000) + 1) & ULL(PH, PL) & 0x0102040810200000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0002040810204000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810204000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_H1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask1 = {{ 0x8080808080808000, 0x0102040810204000 }};
 	static const V2DI mask2 = {{ 0x000000000000007f, 0 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_H1(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x8080808080808000, ~0x0102040810204000 }};
-	unsigned int outflank_h = OUTFLANK_7[(OL >> 1) & 0x3f] & PL;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask1 = {{ 0x8080808080808000, 0x0102040810204000 }};
-	static const V2DI mask2 = {{ 0x000000000000007f, 0 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_GH12;
@@ -1511,68 +899,25 @@ static UINT64 SSE2 flip_sse_H1(unsigned int PL, unsigned int PH, unsigned int OL
 #ifndef hasSSE2
 static UINT64 flip_A2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned short outflank_h;
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0101010101010000) + 1) & ULL(PH, PL) & 0x0101010101000000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010000;
-=======
-	unsigned int outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0101010101010000) + 1) & ULL(PH, PL) & 0x0101010101000000;
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0001010101010000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	unsigned short outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0101010101010000) + 1) & ULL(PH, PL) & 0x0101010101000000;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101010000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (OL + 0x0200) & PL;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x00007e00;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x4020100804020000) + 1) & ULL(PH, PL) & 0x4020100804000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x4020100804020000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0020100804020000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x4020100804020000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_A2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask = {{ 0x0101010101010000, 0x4020100804020000 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_A2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0101010101010000, ~0x4020100804020000 }};
-	unsigned int outflank_h = ((OL & 0x00007e00u) + 0x00000200u) & PL;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask = {{ 0x0101010101010000, 0x4020100804020000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_AB12(9);
@@ -1591,68 +936,25 @@ static UINT64 SSE2 flip_sse_A2(unsigned int PL, unsigned int PH, unsigned int OL
 #ifndef hasSSE2
 static UINT64 flip_B2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned short outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0202020202020000) + 1) & ULL(PH, PL) & 0x0202020202000000;
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020000;
-=======
-	unsigned int outflank_h;
-	UINT64 flipped, outflank_v, outflank_d9;
-
-	outflank_v = ((ULL(OH, OL) | ~0x0202020202020000) + 1) & ULL(PH, PL) & 0x0202020202000000;
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0002020202020000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	unsigned short outflank_h;
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0202020202020000) + 1) & ULL(PH, PL) & 0x0202020202000000;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202020000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = (OL + 0x0400) & PL;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x00007c00;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x8040201008040000) + 1) & ULL(PH, PL) & 0x8040201008000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0040201008040000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008040000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_B2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask = {{ 0x0202020202020000, 0x8040201008040000 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_B2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0202020202020000, ~0x8040201008040000 }};
-	unsigned int outflank_h = ((OL & 0x00007c00u) + 0x00000400u) & PL;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask = {{ 0x0202020202020000, 0x8040201008040000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_AB12(10);
@@ -1675,15 +977,7 @@ static UINT64 flip_C2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0404040404040000) + 1) & ULL(PH, PL) & 0x0404040404000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0004040404040000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404040000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_2[(OL >> 9) & 0x3f] & (PL >> 8);
 	flipped |= (unsigned int) FLIPPED_2_H[outflank_h] & 0x0000ff00u;
@@ -1691,41 +985,15 @@ static UINT64 flip_C2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flipped |= ((PL >> 7) & 0x00020000u & OL);
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x0080402010080000) + 1) & ULL(PH, PL) & 0x0080402010080000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000402010080000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0000402010080000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0000402010080000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_C2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x0404040404040000, 0x0080402010080000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_C2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x0404040404040000, ~0x0080402010080000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_C2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0404040404040000, ~0x0080402010080000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x0404040404040000, 0x0080402010080000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_2[(OL >> 9) & 0x3f] & (PL >> 8);
 	UINT64 flipped;
 
@@ -1749,15 +1017,7 @@ static UINT64 flip_D2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0808080808080000) + 1) & ULL(PH, PL) & 0x0808080808000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0008080808080000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_3[(OL >> 9) & 0x3f] & (PL >> 8);
 	flipped |= (unsigned int) FLIPPED_3_H[outflank_h] & 0x0000ff00u;
@@ -1769,24 +1029,10 @@ static UINT64 flip_D2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_D2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask01 = {{ ~0x0808080808080000, ~0x0000804020100000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_D2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask01 = { ~0x0808080808080000, ~0x0000804020100000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_D2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const V2DI mask01 = {{ ~0x0808080808080000, ~0x0000804020100000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
 	static const UINT64 mask2 = ~0x0000000102040000;
 	unsigned int outflank_h = OUTFLANK_3[(OL >> 9) & 0x3f] & (PL >> 8);
 	UINT64 flipped;
@@ -1811,15 +1057,7 @@ static UINT64 flip_E2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v;
 
 	outflank_v = ((ULL(OH, OL) | ~0x1010101010100000) + 1) & ULL(PH, PL) & 0x1010101010000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0010101010100000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_4[(OL >> 9) & 0x3f] & (PL >> 8);
 	flipped |= (unsigned int) FLIPPED_4_H[outflank_h] & 0x0000ff00u;
@@ -1831,24 +1069,10 @@ static UINT64 flip_E2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_E2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const V2DI mask01 = {{ ~0x1010101010100000, ~0x0000010204080000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_E2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask01 = { ~0x1010101010100000, ~0x0000010204080000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_E2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask01 = {{ ~0x1010101010100000, ~0x0000010204080000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
 	static const UINT64 mask2 = ~0x0000008040200000;
 	unsigned int outflank_h = OUTFLANK_4[(OL >> 9) & 0x3f] & (PL >> 8);
 	UINT64 flipped;
@@ -1873,57 +1097,23 @@ static UINT64 flip_F2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x2020202020200000) + 1) & ULL(PH, PL) & 0x2020202020000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020200000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0020202020200000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020200000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_5[(OL >> 9) & 0x3f] & (PL >> 8);
 	flipped |= (unsigned int) FLIPPED_5_H[outflank_h] & 0x0000ff00u;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0001020408100000) + 1) & ULL(PH, PL) & 0x0001020408000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000020408100000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0000020408100000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0000020408100000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= ((PL >> 9) & 0x00400000u & OL);
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_F2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x2020202020200000, 0x0001020408100000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_F2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x2020202020200000, ~0x0001020408100000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_F2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x2020202020200000, ~0x0001020408100000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x2020202020200000, 0x0001020408100000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_5[(OL >> 9) & 0x3f] & (PL >> 8);
 	UINT64 flipped;
 
@@ -1947,58 +1137,22 @@ static UINT64 flip_G2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x4040404040400000) + 1) & ULL(PH, PL) & 0x4040404040000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040400000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0040404040400000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040400000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(OL >> 8) & 0x3e] & (PL >> 7);
 	flipped |= ((-outflank_h) & 0x3e) << 8;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0102040810200000) + 1) & ULL(PH, PL) & 0x0102040810000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810200000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0002040810200000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810200000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_G2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask1 = {{ 0x4040404040400000, 0x0102040810200000 }};
 	static const V2DI mask2 = {{ 0x0000000000003f00, 0 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_G2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x4040404040400000, ~0x0102040810200000 }};
-	unsigned int outflank_h = OUTFLANK_7[(OL >> 8) & 0x3e] & (PL >> 7);
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask1 = {{ 0x4040404040400000, 0x0102040810200000 }};
-	static const V2DI mask2 = {{ 0x0000000000003f00, 0 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_GH12;
@@ -2021,58 +1175,22 @@ static UINT64 flip_H2(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x8080808080800000) + 1) & ULL(PH, PL) & 0x8080808080000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080800000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0080808080800000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080800000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(OL >> 9) & 0x3f] & (PL >> 8);
 	flipped |= ((-outflank_h) & 0x3f) << 9;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0204081020400000) + 1) & ULL(PH, PL) & 0x0204081020000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0004081020400000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0204081020400000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_H2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask1 = {{ 0x8080808080800000, 0x0204081020400000 }};
 	static const V2DI mask2 = {{ 0x0000000000007f00, 0 }};
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_H2(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x8080808080800000, ~0x0204081020400000 }};
-	unsigned int outflank_h = OUTFLANK_7[(OL >> 9) & 0x3f] & (PL >> 8);
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	static const V2DI mask1 = {{ 0x8080808080800000, 0x0204081020400000 }};
-	static const V2DI mask2 = {{ 0x0000000000007f00, 0 }};
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	FLIP_CARRY_GH12;
@@ -2095,57 +1213,23 @@ static UINT64 flip_A3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0101010101000000) + 1) & ULL(PH, PL) & 0x0101010101000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0101010101000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0101010101000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((OL & 0x007e0000u) + 0x00020000u) & PL;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007e0000u;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x2010080402000000) + 1) & ULL(PH, PL) & 0x2010080400000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0010080402000000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0010080402000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0010080402000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= OL & (((PL << 8) & 0x00000100u) | ((PL << 7) & 0x00000200u));
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_A3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x0101010101000000, 0x2010080402000000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_A3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x0101010101000000, ~0x2010080402000000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_A3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0101010101000000, ~0x2010080402000000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x0101010101000000, 0x2010080402000000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = ((OL & 0x007e0000u) + 0x00020000u) & PL;
 	UINT64 flipped;
 
@@ -2170,57 +1254,23 @@ static UINT64 flip_B3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0202020202000000) + 1) & ULL(PH, PL) & 0x0202020202000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0202020202000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x0202020202000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = ((OL & 0x007c0000u) + 0x00040000u) & PL;
 	flipped |= (outflank_h - (outflank_h >> 8)) & 0x007c0000u;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x4020100804000000) + 1) & ULL(PH, PL) & 0x4020100800000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x0020100804000000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x0020100804000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x0020100804000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= OL & (((PL << 8) & 0x00000200u) | ((PL << 7) & 0x00000400u));
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_B3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const V2DI mask = {{ 0x0202020202000000, 0x4020100804000000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_B3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x0202020202000000, ~0x4020100804000000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_B3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0202020202000000, ~0x4020100804000000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 	static const V2DI mask = {{ 0x0202020202000000, 0x4020100804000000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = ((OL & 0x007c0000u) + 0x00040000u) & PL;
 	UINT64 flipped;
 
@@ -2245,29 +1295,13 @@ static UINT64 flip_C3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d9;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0404040404000000) + 1) & ULL(PH, PL) & 0x0404040404000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0404040404000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0404040404000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_2[(OL >> 17) & 0x3f] & (PL >> 16);
 	flipped |= (unsigned int) FLIPPED_2_H[outflank_h] & 0x00ff0000u;
 
 	outflank_d9 = ((ULL(OH, OL) | ~0x8040201008000000) + 1) & ULL(PH, PL) & 0x8040201008000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008000000;
-=======
-	flipped |= (outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x8040201008000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped |= OutflankToFlipmask(outflank_d9) & 0x8040201008000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= OL & (((PL << 8) & 0x00000400u)
 			| ((PL << 9) & 0x00000200u)
@@ -2276,28 +1310,10 @@ static UINT64 flip_C3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_C3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x0404040404000000, 0x8040201008000000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_C3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x0404040404000000, ~0x8040201008000000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_C3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x0404040404000000, ~0x8040201008000000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x0404040404000000, 0x8040201008000000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_2[(OL >> 17) & 0x3f] & (PL >> 16);
 	UINT64 flipped;
 
@@ -2324,15 +1340,7 @@ static UINT64 flip_D3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v;
 
 	outflank_v = ((ULL(OH, OL) | ~0x0808080808000000) + 1) & ULL(PH, PL) & 0x0808080808000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x0808080808000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_3[(OL >> 17) & 0x3f] & (PL >> 16);
 	flipped |= (unsigned int) FLIPPED_3_H[outflank_h] & 0x00ff0000u;
@@ -2348,24 +1356,10 @@ static UINT64 flip_D3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_D3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const V2DI mask01 = {{ ~0x0808080808000000, ~0x0080402010000000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_D3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask01 = { ~0x0808080808000000, ~0x0080402010000000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_D3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask01 = {{ ~0x0808080808000000, ~0x0080402010000000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
 	static const UINT64 mask2 = ~0x0000010204000000;
 	unsigned int outflank_h = OUTFLANK_3[(OL >> 17) & 0x3f] & (PL >> 16);
 	UINT64 flipped;
@@ -2393,15 +1387,7 @@ static UINT64 flip_E3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v;
 
 	outflank_v = ((ULL(OH, OL) | ~0x1010101010000000) + 1) & ULL(PH, PL) & 0x1010101010000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x1010101010000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_4[(OL >> 17) & 0x3f] & (PL >> 16);
 	flipped |= (unsigned int) FLIPPED_4_H[outflank_h] & 0x00ff0000u;
@@ -2417,24 +1403,10 @@ static UINT64 flip_E3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_E3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const V2DI mask01 = {{ ~0x1010101010000000, ~0x0001020408000000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_E3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask01 = { ~0x1010101010000000, ~0x0001020408000000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_E3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask01 = {{ ~0x1010101010000000, ~0x0001020408000000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
 	static const UINT64 mask2 = ~0x0000804020000000;
 	unsigned int outflank_h = OUTFLANK_4[(OL >> 17) & 0x3f] & (PL >> 16);
 	UINT64 flipped;
@@ -2462,29 +1434,13 @@ static UINT64 flip_F3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x2020202020000000) + 1) & ULL(PH, PL) & 0x2020202020000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x2020202020000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x2020202020000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_5[(OL >> 17) & 0x3f] & (PL >> 16);
 	flipped |= (unsigned int) FLIPPED_5_H[outflank_h] & 0x00ff0000u;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0102040810000000) + 1) & ULL(PH, PL) & 0x0102040810000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810000000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0102040810000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0102040810000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= OL & (((PL << 8) & 0x00002000u)
 			| ((PL << 7) & 0x00004000u)
@@ -2493,28 +1449,10 @@ static UINT64 flip_F3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_F3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	static const V2DI mask = {{ 0x2020202020000000, 0x0102040810000000 }};
-=======
-#ifdef USE_GAS_MMX
-static UINT64 SSE2 flip_sse_F3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	static const __v2di mask = { ~0x2020202020000000, ~0x0102040810000000 };
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
-static UINT64 flip_sse_F3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-<<<<<<< HEAD
-	static const V2DI mask = {{ ~0x2020202020000000, ~0x0102040810000000 }};
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-	static const V2DI mask = {{ 0x2020202020000000, 0x0102040810000000 }};
->>>>>>> 6506166 (More SSE optimizations)
 	unsigned int outflank_h = OUTFLANK_5[(OL >> 17) & 0x3f] & (PL >> 16);
 	UINT64 flipped;
 
@@ -2541,70 +1479,30 @@ static UINT64 flip_G3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x4040404040000000) + 1) & ULL(PH, PL) & 0x4040404040000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x4040404040000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x4040404040000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(OL >> 16) & 0x3e] & (PL >> 15);
 	flipped |= ((-outflank_h) & 0x3e) << 16;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0204081020000000) + 1) & ULL(PH, PL) & 0x0204081000000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0004081020000000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0004081020000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0004081020000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= OL & (((PL << 8) & 0x00004000u) | ((PL << 9) & 0x00002000u));
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_G3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
-{
-	static const V2DI mask1 = {{ 0x4040404040000000, 0x0204081020000000 }};
-	static const V2DI mask2 = {{ 0x0000000000000040, 0x0000000000000010 }};
-	static const V2DI mask3 = {{ 0x00000000003f0000, 0 }};
-	UINT64 flipped;
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128i OO = set1_by_movd(OL, OH);
-	__m128i PP = set1_by_movd(PL, PH);
-	__m128i	outflank_vd, outflank_h, flipped_v2, flipped_h2g2;
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_G3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 {
 	static const V2DI mask1 = {{ 0x4040404040000000, 0x0204081020000000 }};
 	static const V2DI mask2 = {{ 0x0000000000000040, 0x0000000000000010 }};
 	static const V2DI mask3 = {{ 0x00000000003f0000, 0 }};
 	UINT64 flipped;
-<<<<<<< HEAD
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #if defined(hasSSE2) || defined(USE_MSVC_X86)
 	__m128i OO = set1_by_movd(OL, OH);
 	__m128i PP = set1_by_movd(PL, PH);
 	__m128i	outflank_vd, outflank_h, flipped_v2, flipped_h2g2;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_vd = _mm_andnot_si128(OO, mask1.v2);
 	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
@@ -2663,40 +1561,21 @@ static UINT64 flip_H3(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	UINT64 flipped, outflank_v, outflank_d7;
 
 	outflank_v = ((ULL(OH, OL) | ~0x8080808080000000) + 1) & ULL(PH, PL) & 0x8080808080000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080000000;
-=======
-	flipped = (outflank_v - (unsigned int) (outflank_v != 0)) & 0x8080808080000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	flipped = OutflankToFlipmask(outflank_v) & 0x8080808080000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_h = OUTFLANK_7[(OL >> 17) & 0x3f] & (PL >> 16);
 	flipped |= ((-outflank_h) & 0x3f) << 17;
 
 	outflank_d7 = ((ULL(OH, OL) | ~0x0408102040000000) + 1) & ULL(PH, PL) & 0x0408102000000000;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= OutflankToFlipmask(outflank_d7) & 0x0008102040000000;
-=======
-	flipped |= (outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x0008102040000000;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= OutflankToFlipmask(outflank_d7) & 0x0008102040000000;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flipped |= OL & (((PL << 8) & 0x00008000u) | ((PL << 9) & 0x00004000u));
 
 	return flipped;
 }
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
 static UINT64 flip_sse_H3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-<<<<<<< HEAD
 {
 	static const V2DI mask1 = {{ 0x8080808080000000, 0x0408102040000000 }};
 	static const V2DI mask2 = {{ 0x0000000000000080, 0x0000000000000020 }};
@@ -2706,27 +1585,6 @@ static UINT64 flip_sse_H3(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	__m128i OO = set1_by_movd(OL, OH);
 	__m128i PP = set1_by_movd(PL, PH);
 	__m128i	outflank_vd, outflank_h, flipped_v2, flipped_h2g2;
-=======
-#ifdef USE_GAS_MMX
-=======
-#if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> 1dc032e (Improve visual c compatibility)
-static UINT64 SSE2 flip_sse_H3(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-{
-	static const V2DI mask1 = {{ 0x8080808080000000, 0x0408102040000000 }};
-	static const V2DI mask2 = {{ 0x0000000000000080, 0x0000000000000020 }};
-	static const V2DI mask3 = {{ 0x00000000007f0000, 0 }};
-	UINT64 flipped;
-<<<<<<< HEAD
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#if defined(hasSSE2) || defined(USE_MSVC_X86)
-	__m128i OO = set1_by_movd(OL, OH);
-	__m128i PP = set1_by_movd(PL, PH);
-	__m128i	outflank_vd, outflank_h, flipped_v2, flipped_h2g2;
->>>>>>> 6506166 (More SSE optimizations)
 
 	outflank_vd = _mm_andnot_si128(OO, mask1.v2);
 	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
@@ -2778,14 +1636,7 @@ static UINT64 SSE2 flip_sse_H3(unsigned int PL, unsigned int PH, unsigned int OL
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_A4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, flip_d7, outflank_d9;
@@ -2802,15 +1653,7 @@ static UINT64 flip_A4(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flip_d7 |= (flip_d7 >> 7) & OL;
 	flipped |= flip_d7 & -(flip_d7 & (PL << 7));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = ((OH | ~0x10080402u) + 1) & PH & 0x10080400u;
-=======
-	outflank_d9 = ((OH | ~0x10080402u) + 0x00000002u) & PH & 0x10080400u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	outflank_d9 = ((OH | ~0x10080402u) + 1) & PH & 0x10080400u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x00080402u) << 32;
 
 	return flipped;
@@ -2875,14 +1718,7 @@ static UINT64 flip_sse_A4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_B4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, flip_d7, outflank_d9;
@@ -2899,15 +1735,7 @@ static UINT64 flip_B4(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flip_d7 |= (flip_d7 >> 7) & OL;
 	flipped |= flip_d7 & -(flip_d7 & (PL << 7));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = ((OH | ~0x20100804u) + 1) & PH & 0x20100800u;
-=======
-	outflank_d9 = ((OH | ~0x20100804u) + 0x00000004u) & PH & 0x20100800u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	outflank_d9 = ((OH | ~0x20100804u) + 1) & PH & 0x20100800u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x00100804u) << 32;
 
 	return flipped;
@@ -2972,87 +1800,7 @@ static UINT64 flip_sse_B4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
-
-#if 0 // MMX - slower
-
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-static UINT64 flip_C4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	UINT64 flipped;
-
-	outflank_v = OUTFLANK_3[(((OL & 0x04040400u) + ((OH & 0x00040404u) << 4)) * 0x00408102u) >> 25]
-		& ((((PL & 0x04040404u) + ((PH & 0x04040404u) << 4)) * 0x00408102u) >> 24);
-<<<<<<< HEAD
-=======
-	__asm__ volatile (
-		"movq	%0, %%mm0\n\t"
-		"pand	%1, %%mm0"
-	:: "m" (FLIPPED_3_V[outflank_v]), "m" (mask_c1c8));
-
-	outflank_h = OUTFLANK_2[(OL >> 25) & 0x3f] & (PL >> 24);
-	__asm__ volatile (
-		"movq	%0, %%mm1\n\t"
-		"pand	%1, %%mm1\n\t"
-		"por	%%mm1, %%mm0"
-	:: "m" (FLIPPED_2_H[outflank_h]), "m" (mask_a4h4));
-
-	outflank_d7 = OUTFLANK_2[(((OL & 0x04081000u) + (OH & 0x00000002u)) * 0x01010101u) >> 25]
-		& ((((PL & 0x04081020u) + (PH & 0x00000102u)) * 0x01010101u) >> 24);
-	__asm__ volatile (
-		"movq	%0, %%mm1\n\t"
-		"pand	%1, %%mm1\n\t"
-		"por	%%mm1, %%mm0"
-	:: "m" (FLIPPED_2_H[outflank_d7]), "m" (mask_f1a6));
-
-	outflank_d9 = OUTFLANK_2[(((OL & 0x04020000u) + (OH & 0x00201008u)) * 0x01010101u) >> 25]
-		& ((((PL & 0x04020100u) + (PH & 0x40201008u)) * 0x01010101u) >> 24);
-	__asm__ volatile (
-		"movq	%1, %%mm1\n\t"
-		"pand	%2, %%mm1\n\t"
-		"por	%%mm1, %%mm0\n\t"
-		"movd	%%mm0, %%eax\n\t"
-		"psrlq	$32, %%mm0\n\t"
-		"movd	%%mm0, %%edx\n\t"
-		"emms"
-	: "=A" (flipped) : "m" (FLIPPED_2_H[outflank_d9]), "m" (mask_a2g8));
-
-	return flipped;
-}
-
-#elif 0 // SSE2 - even slow
-
-static UINT64 flip_C4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
-	__m128i flipped;
-
-	outflank_v = OUTFLANK_3[(((OL & 0x04040400u) + ((OH & 0x00040404u) << 4)) * 0x00408102u) >> 25]
-		& ((((PL & 0x04040404u) + ((PH & 0x04040404u) << 4)) * 0x00408102u) >> 24);
-	outflank_h = OUTFLANK_2[(OL >> 25) & 0x3f] & (PL >> 24);
-	flipped = _mm_set_epi64x(FLIPPED_2_H[outflank_h], FLIPPED_3_V[outflank_v])
-		& _mm_set_epi64x(mask_a4h4, mask_c1c8);
-
-	outflank_d7 = OUTFLANK_2[(((OL & 0x04081000u) + (OH & 0x00000002u)) * 0x01010101u) >> 25]
-		& ((((PL & 0x04081020u) + (PH & 0x00000102u)) * 0x01010101u) >> 24);
-	outflank_d9 = OUTFLANK_2[(((OL & 0x04020000u) + (OH & 0x00201008u)) * 0x01010101u) >> 25]
-		& ((((PL & 0x04020100u) + (PH & 0x40201008u)) * 0x01010101u) >> 24);
-	flipped |= _mm_set_epi64x(FLIPPED_2_H[outflank_d9], FLIPPED_2_H[outflank_d7])
-		& _mm_set_epi64x(mask_a2g8, mask_f1a6);
-
-	flipped |= _mm_shuffle_epi32(flipped, 0x4e);
-	return (UINT64) _mm_movepi64_pi64(flipped);	// flipped[0]
-}
-
-#else
-
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_C4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -3060,7 +1808,6 @@ static UINT64 flip_C4(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 
 	outflank_v = OUTFLANK_3[(((OL & 0x04040400u) + ((OH & 0x00040404u) << 4)) * 0x00408102u) >> 25]
 		& ((((PL & 0x04040404u) + ((PH & 0x04040404u) << 4)) * 0x00408102u) >> 24);
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
 	flipped = FLIPPED_3_V[outflank_v] & 0x0004040404040400;
 
 	outflank_h = OUTFLANK_2[(OL >> 25) & 0x3f] & (PL >> 24);
@@ -3138,14 +1885,7 @@ static UINT64 flip_sse_C4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_D4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -3247,14 +1987,7 @@ static UINT64 flip_sse_D4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_E4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -3356,14 +2089,7 @@ static UINT64 flip_sse_E4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_F4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -3448,14 +2174,7 @@ static UINT64 flip_sse_F4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_G4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, flip_d9;
@@ -3468,15 +2187,7 @@ static UINT64 flip_G4(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	outflank_h = OUTFLANK_7[(OL >> 24) & 0x3e] & (PL >> 23);
 	flipped |= ((-outflank_h) & 0x3e) << 24;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d7 = ((OH | ~0x04081020u) + 1) & PH & 0x04081000u;
-=======
-	outflank_d7 = ((OH | ~0x04081020u) + 0x00000020u) & PH & 0x04081000u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_d7 = ((OH | ~0x04081020u) + 1) & PH & 0x04081000u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x00081020u) << 32;
 
 	flip_d9 = OL & 0x00200000u;
@@ -3550,14 +2261,7 @@ static UINT64 flip_sse_G4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_H4(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, flip_d9;
@@ -3570,15 +2274,7 @@ static UINT64 flip_H4(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	outflank_h = OUTFLANK_7[(OL >> 25) & 0x3f] & (PL >> 24);
 	flipped |= ((-outflank_h) & 0x3f) << 25;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d7 = ((OH | ~0x08102040u) + 1) & PH & 0x08102000u;
-=======
-	outflank_d7 = ((OH | ~0x08102040u) + 0x00000040u) & PH & 0x08102000u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_d7 = ((OH | ~0x08102040u) + 1) & PH & 0x08102000u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x00102040u) << 32;
 
 	flip_d9 = OL & 0x00400000u;
@@ -3652,57 +2348,25 @@ static UINT64 flip_sse_H4(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
 static UINT64 flip_A5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, flip_d7, outflank_d9;
 	unsigned char outflank_h;
-=======
-static UINT64 flip_A5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_h, outflank_v, flip_d7, outflank_d9;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
-static UINT64 flip_A5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_v, flip_d7, outflank_d9;
-	unsigned char outflank_h;
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	outflank_v = OUTFLANK_4[(((OL & 0x01010100u) + ((OH & 0x00010101u) << 4)) * 0x01020408u) >> 25]
 		& ((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24);
 	flipped = FLIPPED_4_V[outflank_v] & 0x0001010101010100;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (OH + 0x02) & PH;
 	flipped |= (UINT64) (((outflank_h * 0xff) >> 8) & 0x0000007eu) << 32;
-=======
-	outflank_h = ((OH & 0x0000007eu) + 0x00000002u) & PH;
-	flipped |= (UINT64) ((((outflank_h << 8) - outflank_h) >> 8) & 0x0000007eu) << 32;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_h = (OH + 0x02) & PH;
-	flipped |= (UINT64) (((outflank_h * 0xff) >> 8) & 0x0000007eu) << 32;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flip_d7 = OL & ((OL >> 7) | 0x02000000u);
 	flip_d7 &= ((flip_d7 & 0x02040000u) >> 14) | 0x02040000u;
 	flipped |= flip_d7 & -(flip_d7 & (PL << 7));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = ((OH | ~0x08040200u) + 1) & PH & 0x08040000u;
-=======
-	outflank_d9 = ((OH | ~0x08040200u) + 0x00000200u) & PH & 0x08040000u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	outflank_d9 = ((OH | ~0x08040200u) + 1) & PH & 0x08040000u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x00040200u) << 32;
 
 	return flipped;
@@ -3767,57 +2431,25 @@ static UINT64 flip_sse_A5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-static UINT64 flip_B5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_v, flip_d7, outflank_d9;
-	unsigned char outflank_h;
-=======
-static UINT64 flip_B5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_h, outflank_v, flip_d7, outflank_d9;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
 static UINT64 flip_B5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, flip_d7, outflank_d9;
 	unsigned char outflank_h;
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	outflank_v = OUTFLANK_4[(((OL & 0x02020200u) + ((OH & 0x00020202u) << 4)) * 0x00810204u) >> 25]
 		& ((((PL & 0x02020202u) + ((PH & 0x02020202u) << 4)) * 0x00810204u) >> 24);
 	flipped = FLIPPED_4_V[outflank_v] & 0x0002020202020200;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (OH + 0x04) & PH;
 	flipped |= (UINT64) (((outflank_h * 0xff) >> 8) & 0x0000007cu) << 32;
-=======
-	outflank_h = ((OH & 0x0000007cu) + 0x00000004u) & (PH >> 0);
-	flipped |= (UINT64) ((((outflank_h << 8) - outflank_h) >> 8) & 0x0000007cu) << 32;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_h = (OH + 0x04) & PH;
-	flipped |= (UINT64) (((outflank_h * 0xff) >> 8) & 0x0000007cu) << 32;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flip_d7 = OL & ((OL >> 7) | 0x04000000u);
 	flip_d7 &= ((flip_d7 & 0x04080000u) >> 14) | 0x04080000u;
 	flipped |= flip_d7 & -(flip_d7 & (PL << 7));
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d9 = ((OH | ~0x10080400u) + 1) & PH & 0x10080000u;
-=======
-	outflank_d9 = ((OH | ~0x10080400u) + 0x00000400u) & PH & 0x10080000u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	outflank_d9 = ((OH | ~0x10080400u) + 1) & PH & 0x10080000u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d9 - (unsigned int) (outflank_d9 != 0)) & 0x00080400u) << 32;
 
 	return flipped;
@@ -3882,14 +2514,7 @@ static UINT64 flip_sse_B5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_C5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -3973,14 +2598,7 @@ static UINT64 flip_sse_C5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_D5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -4082,14 +2700,7 @@ static UINT64 flip_sse_D5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_E5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -4191,14 +2802,7 @@ static UINT64 flip_sse_E5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_F5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, outflank_d9;
@@ -4282,14 +2886,7 @@ static UINT64 flip_sse_F5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_G5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, flip_d9;
@@ -4302,15 +2899,7 @@ static UINT64 flip_G5(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	outflank_h = OUTFLANK_7[(OH >> 0) & 0x3e] & (PH << 1);
 	flipped |= (UINT64) ((-outflank_h) & 0x3e) << 32;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d7 = ((OH | ~0x08102000u) + 1) & PH & 0x08100000u;
-=======
-	outflank_d7 = ((OH | ~0x08102000u) + 0x00002000u) & PH & 0x08100000u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_d7 = ((OH | ~0x08102000u) + 1) & PH & 0x08100000u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x00102000u) << 32;
 
 	flip_d9 = OL & ((OL >> 9) | 0x20000000u);
@@ -4384,14 +2973,7 @@ static UINT64 flip_sse_G5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_H5(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7, flip_d9;
@@ -4404,15 +2986,7 @@ static UINT64 flip_H5(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	outflank_h = OUTFLANK_7[(OH >> 1) & 0x3f] & (PH >> 0);
 	flipped |= (UINT64) (((-outflank_h) & 0x3f) << 1) << 32;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d7 = ((OH | ~0x10204000u) + 1) & PH & 0x10200000u;
-=======
-	outflank_d7 = ((OH | ~0x10204000u) + 0x00004000u) & PH & 0x10200000u;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_d7 = ((OH | ~0x10204000u) + 1) & PH & 0x10200000u;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	flipped |= (UINT64) ((outflank_d7 - (unsigned int) (outflank_d7 != 0)) & 0x00204000u) << 32;
 
 	flip_d9 = OL & ((OL >> 9) | 0x40000000u);
@@ -4486,40 +3060,18 @@ static UINT64 flip_sse_H5(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-static UINT64 flip_A6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_v, flip_d7;
-	unsigned short outflank_h;
-=======
-static UINT64 flip_A6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_h, outflank_v, flip_d7;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
 static UINT64 flip_A6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, flip_d7;
 	unsigned short outflank_h;
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	outflank_v = OUTFLANK_5[(((OL & 0x01010100u) + ((OH & 0x00010101u) << 4)) * 0x01020408u) >> 25]
 		& ((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24);
 	flipped = FLIPPED_5_V[outflank_v] & 0x0001010101010100;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (OH + 0x0200) & PH;
-=======
-	outflank_h = ((OH & 0x00007e00u) + 0x00000200u) & PH;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_h = (OH + 0x0200) & PH;
->>>>>>> 6506166 (More SSE optimizations)
 	flipped |= (UINT64) ((outflank_h - (outflank_h >> 8)) & 0x00007e00u) << 32;
 
 	flip_d7 = (OL >> 8) | (OH << 24);
@@ -4556,40 +3108,18 @@ static UINT64 flip_sse_A6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
 static UINT64 flip_B6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, flip_d7;
 	unsigned short outflank_h;
-=======
-static UINT64 flip_B6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_h, outflank_v, flip_d7;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
-static UINT64 flip_B6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
-{
-	unsigned int outflank_v, flip_d7;
-	unsigned short outflank_h;
->>>>>>> 6506166 (More SSE optimizations)
 	UINT64 flipped;
 
 	outflank_v = OUTFLANK_5[(((OL & 0x02020200u) + ((OH & 0x00020202u) << 4)) * 0x00810204u) >> 25]
 		& ((((PL & 0x02020202u) + ((PH & 0x02020202u) << 4)) * 0x00810204u) >> 24);
 	flipped = FLIPPED_5_V[outflank_v] & 0x0002020202020200;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (OH + 0x0400) & PH;
-=======
-	outflank_h = ((OH & 0x00007c00u) + 0x00000400u) & PH;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_h = (OH + 0x0400) & PH;
->>>>>>> 6506166 (More SSE optimizations)
 	flipped |= (UINT64) ((outflank_h - (outflank_h >> 8)) & 0x00007c00u) << 32;
 
 	flip_d7 = (OL >> 8) | (OH << 24);
@@ -4626,14 +3156,7 @@ static UINT64 flip_sse_B6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_C6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d7;
@@ -4680,14 +3203,7 @@ static UINT64 flip_sse_C6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_D6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -4704,16 +3220,7 @@ static UINT64 flip_D6(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 		& ((((PL & 0x22418000u) + (PH & 0x00000814u)) * 0x01010101u) >> 24);
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000081422400000;	// A3D6H2
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= (UINT64) (OH & (((PH >> 9) & 0x00100000u) | ((PH >> 7) & 0x00040000u))) << 32;
-=======
-	flipped |= (UINT64) (OH &
-		(((PH >> 9) & 0x00100000u) | ((PH >> 7) & 0x00040000u))) << 32;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= (UINT64) (OH & (((PH >> 9) & 0x00100000u) | ((PH >> 7) & 0x00040000u))) << 32;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -4780,14 +3287,7 @@ static UINT64 flip_sse_D6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_E6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -4804,16 +3304,7 @@ static UINT64 flip_E6(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 		& ((((PL & 0x44820100u) + (PH & 0x00001028u)) * 0x01010101u) >> 24);
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000102844020000;	// A2E6H3
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= (UINT64) (OH & (((PH >> 9) & 0x00200000u) | ((PH >> 7) & 0x00080000u))) << 32;
-=======
-	flipped |= (UINT64) (OH &
-		(((PH >> 9) & 0x00200000u) | ((PH >> 7) & 0x00080000u))) << 32;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= (UINT64) (OH & (((PH >> 9) & 0x00200000u) | ((PH >> 7) & 0x00080000u))) << 32;
->>>>>>> 6506166 (More SSE optimizations)
 
 	return flipped;
 }
@@ -4880,14 +3371,7 @@ static UINT64 flip_sse_E6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_F6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d9;
@@ -4934,14 +3418,7 @@ static UINT64 flip_sse_F6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_G6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, flip_d9;
@@ -4952,17 +3429,7 @@ static UINT64 flip_G6(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flipped = FLIPPED_5_V[outflank_v] & 0x0040404040404000;
 
 	outflank_h = OUTFLANK_7[(OH >> 8) & 0x3e] & (PH >> 7);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= (UINT64) ((((-outflank_h) & 0x3e) << 8) | ((PH >> 7) & 0x00200000u & OH)) << 32;
-=======
-	flipped |= (UINT64) (((-outflank_h) & 0x3e) << 8) << 32;
-
-	flipped |= (UINT64) ((PH >> 7) & 0x00200000u & OH) << 32;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= (UINT64) ((((-outflank_h) & 0x3e) << 8) | ((PH >> 7) & 0x00200000u & OH)) << 32;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flip_d9 = (OL >> 8) | (OH << 24);
 	flip_d9 &= (flip_d9 >> 9) | 0x20000000u;
@@ -5037,14 +3504,7 @@ static UINT64 flip_sse_G6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_H6(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, flip_d9;
@@ -5055,17 +3515,7 @@ static UINT64 flip_H6(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flipped = FLIPPED_5_V[outflank_v] & 0x0080808080808000;
 
 	outflank_h = OUTFLANK_7[(OH >> 9) & 0x3f] & (PH >> 8);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	flipped |= (UINT64) ((((-outflank_h) & 0x3f) << 9) | ((PH >> 7) & 0x00400000u & OH)) << 32;
-=======
-	flipped |= (UINT64) (((-outflank_h) & 0x3f) << 9) << 32;
-
-	flipped |= (UINT64) ((PH >> 7) & 0x00400000u & OH) << 32;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	flipped |= (UINT64) ((((-outflank_h) & 0x3f) << 9) | ((PH >> 7) & 0x00400000u & OH)) << 32;
->>>>>>> 6506166 (More SSE optimizations)
 
 	flip_d9 = (OL >> 8) | (OH << 24);
 	flip_d9 &= (flip_d9 >> 9) | 0x40000000u;
@@ -5140,14 +3590,7 @@ static UINT64 flip_sse_H6(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_A7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, outflank_h, outflank_d7, flippedH, flippedL;
@@ -5164,7 +3607,6 @@ static UINT64 flip_A7(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 		& ((((PL & 0x08102040u) + (PH & 0x00000004u)) * 0x01010101u) >> 24);
 	flippedH |= ((outflank_d7 * 0x0000f8f0u) >> 8) & 0x00000204u;
 	flippedL |= (outflank_d7 * 0x00e0c080u) & 0x08102000u;
-<<<<<<< HEAD
 
 	return ULL(flippedH, flippedL);
 }
@@ -5176,15 +3618,8 @@ static UINT64 flip_sse_A7(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	UINT64 flipped;
 
 	FLIP_MS1B_AB78(49);
-<<<<<<< HEAD
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-
-	return ULL(flippedH, flippedL);
-=======
 
 	return flipped;
->>>>>>> 6506166 (More SSE optimizations)
 }
 #endif
 
@@ -5195,14 +3630,7 @@ static UINT64 flip_sse_A7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_B7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, outflank_h, outflank_d7, flippedH, flippedL;
@@ -5219,7 +3647,6 @@ static UINT64 flip_B7(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 		& ((PL & 0x10204080u) + (PH & 0x00000008u)) * 0x01010101u) >> 25;
 	flippedH |= ((outflank_d7 * 0x0000f8f0u) >> 7) & 0x00000408u;
 	flippedL |= (outflank_d7 * 0x01c18100u) & 0x10204000u;
-<<<<<<< HEAD
 
 	return ULL(flippedH, flippedL);
 }
@@ -5231,15 +3658,8 @@ static UINT64 flip_sse_B7(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	UINT64 flipped;
 
 	FLIP_MS1B_AB78(50);
-<<<<<<< HEAD
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-
-	return ULL(flippedH, flippedL);
-=======
 
 	return flipped;
->>>>>>> 6506166 (More SSE optimizations)
 }
 #endif
 
@@ -5250,14 +3670,7 @@ static UINT64 flip_sse_B7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_C7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -5300,14 +3713,7 @@ static UINT64 flip_sse_C7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_D7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -5386,14 +3792,7 @@ static UINT64 flip_sse_D7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_E7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -5472,14 +3871,7 @@ static UINT64 flip_sse_E7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_F7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -5522,14 +3914,7 @@ static UINT64 flip_sse_F7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_G7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d9, flippedH, flippedL;
@@ -5548,7 +3933,6 @@ static UINT64 flip_G7(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flippedL |= (outflank_d9 * 0x07030100u) & 0x08040200u;
 
 	return ULL(flippedH, flippedL);
-<<<<<<< HEAD
 }
 #endif
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
@@ -5598,11 +3982,6 @@ static UINT64 flip_sse_G7(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	: "m" (PL), "m" (PH), "m" (OL), "m" (OH), "m" (mask1), "m" (mask2), "m" (expmask), "m" (minus0800), "m" (k1e52));
 #endif
 	return flipped;
-<<<<<<< HEAD
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
->>>>>>> 6506166 (More SSE optimizations)
 }
 #endif
 
@@ -5613,14 +3992,7 @@ static UINT64 flip_sse_G7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_H7(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d9, flippedH, flippedL;
@@ -5698,14 +4070,7 @@ static UINT64 flip_sse_H7(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_A8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, outflank_h, outflank_d7, flippedH, flippedL;
@@ -5715,22 +4080,13 @@ static UINT64 flip_A8(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flippedH = ((outflank_v * 0x003f1f0fu) >> 7) & 0x00010101u;
 	flippedL = (outflank_v * 0x000e0602u) & 0x01010100u;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_h = (OH + 0x02000000u) & PH;
-=======
-	outflank_h = ((OH & 0x7e000000u) + 0x02000000u) & PH;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 	outflank_h = (OH + 0x02000000u) & PH;
->>>>>>> 6506166 (More SSE optimizations)
 	flippedH |= (outflank_h - (outflank_h >> 8)) & 0x7e000000u;
 
 	outflank_d7 = ((((OL & 0x10204000u) + (OH & 0x00020408u)) * 0x01010101u + 0x02000000u)
 		& ((PL & 0x10204080u) + (PH & 0x00000408u)) * 0x01010101u) >> 24;
 	flippedH |= ((outflank_d7 * 0x00fcf8f0u) >> 8) & 0x00020408u;
 	flippedL |= (outflank_d7 * 0x00e0c080u) & 0x10204000u;
-<<<<<<< HEAD
 
 	return ULL(flippedH, flippedL);
 }
@@ -5742,15 +4098,8 @@ static UINT64 flip_sse_A8(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	UINT64 flipped;
 
 	FLIP_MS1B_AB78(57);
-<<<<<<< HEAD
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-
-	return ULL(flippedH, flippedL);
-=======
 
 	return flipped;
->>>>>>> 6506166 (More SSE optimizations)
 }
 #endif
 
@@ -5761,14 +4110,7 @@ static UINT64 flip_sse_A8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_B8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_v, outflank_h, outflank_d7, flippedH, flippedL;
@@ -5778,22 +4120,13 @@ static UINT64 flip_B8(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flippedH = ((outflank_v * 0x003f1f0fu) >> 6) & 0x00020202u;
 	flippedL = (outflank_v * 0x001c0c04u) & 0x02020200u;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_h = (OH + 0x04000000u) & PH;
-=======
-	outflank_h = ((OH & 0x7c000000u) + 0x04000000u) & PH;
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-	outflank_h = (OH + 0x04000000u) & PH;
->>>>>>> 6506166 (More SSE optimizations)
 	flippedH |= (outflank_h - (outflank_h >> 8)) & 0x7c000000u;
 
 	outflank_d7 = ((((OL & 0x20400000u) + (OH & 0x00040810u)) * 0x01010101u + 0x04000000u)
 		& ((PL & 0x20408000u) + (PH & 0x00000810u)) * 0x01010101u) >> 25;
 	flippedH |= ((outflank_d7 * 0x00fcf8f0u) >> 7) & 0x00040810u;
 	flippedL |= (outflank_d7 * 0x01c18000u) & 0x20400000u;
-<<<<<<< HEAD
 
 	return ULL(flippedH, flippedL);
 }
@@ -5805,15 +4138,8 @@ static UINT64 flip_sse_B8(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	UINT64 flipped;
 
 	FLIP_MS1B_AB78(58);
-<<<<<<< HEAD
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-
-	return ULL(flippedH, flippedL);
-=======
 
 	return flipped;
->>>>>>> 6506166 (More SSE optimizations)
 }
 #endif
 
@@ -5824,14 +4150,7 @@ static UINT64 flip_sse_B8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_C8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -5874,14 +4193,7 @@ static UINT64 flip_sse_C8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_D8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -5960,14 +4272,7 @@ static UINT64 flip_sse_D8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_E8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -6046,14 +4351,7 @@ static UINT64 flip_sse_E8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_F8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d;
@@ -6130,14 +4428,7 @@ static UINT64 flip_sse_F8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 #ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_G8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d9, flippedH, flippedL;
@@ -6156,7 +4447,6 @@ static UINT64 flip_G8(unsigned int PL, unsigned int PH, unsigned int OL, unsigne
 	flippedL |= (outflank_d9 * 0x03010000u) & 0x04020000u;
 
 	return ULL(flippedH, flippedL);
-<<<<<<< HEAD
 }
 #endif
 #if defined(hasSSE2) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
@@ -6207,11 +4497,6 @@ static UINT64 flip_sse_G8(unsigned int PL, unsigned int PH, unsigned int OL, uns
 	: "m" (PL), "m" (PH), "m" (OL), "m" (OH), "m" (mask1), "m" (mask2), "m" (expmask), "m" (minuseight), "m" (k1e52));
 #endif
 	return flipped;
-<<<<<<< HEAD
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
->>>>>>> 6506166 (More SSE optimizations)
 }
 #endif
 
@@ -6222,14 +4507,7 @@ static UINT64 flip_sse_G8(unsigned int PL, unsigned int PH, unsigned int OL, uns
  * @param O opponent's disc pattern.
  * @return flipped disc pattern.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef hasSSE2
-=======
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
-#ifndef hasSSE2
->>>>>>> 6506166 (More SSE optimizations)
 static UINT64 flip_H8(unsigned int PL, unsigned int PH, unsigned int OL, unsigned int OH)
 {
 	unsigned int outflank_h, outflank_v, outflank_d9, flippedH, flippedL;
@@ -6352,21 +4630,10 @@ UINT64 flip32(unsigned int pos, unsigned int bb[]) {
 	return (*flip[pos])(bb[0], bb[1], bb[2], bb[3]);
 }
 #endif
-<<<<<<< HEAD
-
-#if !defined(hasSSE2) && (defined(USE_GAS_MMX) || defined(USE_MSVC_X86))
-
-static UINT64 (*flip_sse[])(unsigned int, unsigned int, unsigned int, unsigned int) = {
-=======
 
 #if !defined(hasSSE2) && (defined(USE_GAS_MMX) || defined(USE_MSVC_X86))
 
-<<<<<<< HEAD
-static UINT64 (*flip_sse_123[])(unsigned int, unsigned int, unsigned int, unsigned int) = {
->>>>>>> dd6b636 (Bcc32 friendly and minor improvement on Flip_32.)
-=======
 static UINT64 (*flip_sse[])(unsigned int, unsigned int, unsigned int, unsigned int) = {
->>>>>>> 6506166 (More SSE optimizations)
 	flip_sse_A1, flip_sse_B1, flip_sse_C1, flip_sse_D1, flip_sse_E1, flip_sse_F1, flip_sse_G1, flip_sse_H1,
 	flip_sse_A2, flip_sse_B2, flip_sse_C2, flip_sse_D2, flip_sse_E2, flip_sse_F2, flip_sse_G2, flip_sse_H2,
 	flip_sse_A3, flip_sse_B3, flip_sse_C3, flip_sse_D3, flip_sse_E3, flip_sse_F3, flip_sse_G3, flip_sse_H3,
diff --git a/src/flip_neon_bitscan.c b/src/flip_neon_bitscan.c
index 6f32c87..7e2f019 100644
--- a/src/flip_neon_bitscan.c
+++ b/src/flip_neon_bitscan.c
@@ -34,13 +34,10 @@
  * returned to generate moves.
  *
  * If the OUTFLANK search is in LSB to MSB direction, carry propagation 
-<<<<<<< HEAD
-<<<<<<< HEAD
  * (with Neon if appropriate) can be used to determine contiguous opponent discs.
  * If the OUTFLANK search is in MSB to LSB direction, lzcnt64 is used.
  *
  * @date 1998 - 2022
-<<<<<<< HEAD
  * @author Richard Delorme
  * @author Toshihiko Okuhara
  * @version 4.5
@@ -50,36 +47,7 @@
 
 // included from board.c or linked in Android Arm32 dispatch build
 #if defined(flip_neon) || defined(DISPATCH_NEON)
-<<<<<<< HEAD
 
-=======
- * can be used to determine contiguous opponent discs.
- * If the OUTFLANK search is in MSB to LSB direction, lzcnt64 is used if 
- * available, or __builtin_bswap is used to use carry propagation backwards.
-=======
- * (with Neon if appropriate) can be used to determine contiguous opponent discs.
- * If the OUTFLANK search is in MSB to LSB direction, lzcnt64 is used.
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
- *
- * @date 1998 - 2020
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.5
- */
-
-#include "bit_intrinsics.h"
-
-<<<<<<< HEAD
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-// included from board.c or linked in Android Arm32 dispatch build
-#if defined(flip_neon) || (defined(ANDROID) && defined(__arm__) && !defined(hasNeon))
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 /** rotated outflank array (indexed with inner 6 bits) */
 static const unsigned char OUTFLANK_3[64] = {	// ...bahgf
 	0x00, 0x00, 0x10, 0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x11, 0x09, 0x00, 0x00, 0x00, 0x00,
@@ -231,18 +199,8 @@ static unsigned long long flip_D1(const unsigned long long P, const unsigned lon
 	outflank_v = (outflank_v & -outflank_v) & 0x0808080808080800 & P;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080800;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_3[(((unsigned int) O & 0x40221408) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000008041221408) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-=======
-	outflank_d = ((P & 0x0000008041221408) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0000000040221408) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflank_d = OUTFLANK_3[(((unsigned int) O & 0x40221408) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000008041221408) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000000040221408;	// A4D1H5
 
 	outflank_h = OUTFLANK_3[(O >> 1) & 0x3f] & rotl8(P, 3);
@@ -267,18 +225,8 @@ static unsigned long long flip_E1(const unsigned long long P, const unsigned lon
 	outflank_v = (outflank_v & -outflank_v) & 0x1010101010101000 & P;
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010101000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_4[(((unsigned int) O & 0x02442810) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000000182442810) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-=======
-	outflank_d = ((P & 0x0000000182442810) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0000000002442810) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflank_d = OUTFLANK_4[(((unsigned int) O & 0x02442810) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000000182442810) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000002442810;	// A5E1H4
 
 	outflank_h = OUTFLANK_4[(O >> 1) & 0x3f] & rotl8(P, 2);
@@ -451,18 +399,8 @@ static unsigned long long flip_D2(const unsigned long long P, const unsigned lon
 	outflank_v = (outflank_v & -outflank_v) & 0x0808080808080000 & P;
 	flipped = OutflankToFlipmask(outflank_v) & 0x0808080808080000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 8) & 0x40221408) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000804122140800) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-=======
-	outflank_d = ((P & 0x0000804122140800) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0000004022140800) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 8) & 0x40221408) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000804122140800) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000004022140800;	// A5D2H6
 
 	outflank_h = OUTFLANK_3[(O >> 9) & 0x3f] & rotl8(P >> 8, 3);
@@ -487,18 +425,8 @@ static unsigned long long flip_E2(const unsigned long long P, const unsigned lon
 	outflank_v = (outflank_v & -outflank_v) & 0x1010101010100000 & P;
 	flipped = OutflankToFlipmask(outflank_v) & 0x1010101010100000;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 8) & 0x02442810) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000018244281000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-=======
-	outflank_d = ((P & 0x0000018244281000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0000000244281000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 8) & 0x02442810) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000018244281000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000000244281000;	// A6E2H5
 
 	outflank_h = OUTFLANK_4[(O >> 9) & 0x3f] & rotl8(P >> 8, 2);
@@ -687,18 +615,8 @@ static unsigned long long flip_D3(const unsigned long long P, const unsigned lon
 	outflank_h = OUTFLANK_3[(O >> 17) & 0x3f] & rotl8(P >> 16, 3);
 	flipped |= (unsigned char) FLIPPED_3_H[outflank_h] << 16;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 16) & 0x40221408) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0080412214080000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-=======
-	outflank_d = ((P & 0x0080412214080000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0000402214080000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 16) & 0x40221408) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0080412214080000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000402214080000;	// A6D3H7
 
 	flipped |= (((P << 7) & 0x0000000000001000) | ((P << 8) & 0x000000000000800) | ((P << 9) & 0x000000000000400)) & O;
@@ -725,18 +643,8 @@ static unsigned long long flip_E3(const unsigned long long P, const unsigned lon
 	outflank_h = OUTFLANK_4[(O >> 17) & 0x3f] & rotl8(P >> 16, 2);
 	flipped |= (unsigned char) FLIPPED_4_H[outflank_h] << 16;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 16) & 0x02442810) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0001824428100000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-=======
-	outflank_d = ((P & 0x0001824428100000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0000024428100000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 16) & 0x02442810) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0001824428100000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000024428100000;	// A7E3H6
 
 	flipped |= (((P << 7) & 0x0000000000002000) | ((P << 8) & 0x000000000001000) | ((P << 9) & 0x000000000000800)) & O;
@@ -759,34 +667,15 @@ static unsigned long long flip_F3(const unsigned long long P, const unsigned lon
 	uint32x4_t OL = vtrnq_u32(vreinterpretq_u32_u64(OO), vreinterpretq_u32_u64(OO)).val[0];
 	uint32x4_t outflankL;
 	uint64x2_t outflankH, flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	const uint64x2_t maskL = { 0x001f000000002020, 0x0000408000001008 };
-=======
-	const uint32x4_t maskL = { 0x00002020, 0x001f0000, 0x00001008, 0x00004080 };
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	const uint64x2_t maskL = { 0x001f000000002020, 0x0000408000001008 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint64x2_t maskH = { 0x2020202020000000, 0x0102040810000000 };
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint64x2_t one = vdupq_n_u64(1);
 	unsigned long long flipped_g3g4;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(maskL, OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(maskL, vreinterpretq_u32_s32(
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
 	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL))))));
 
 	outflankH = vbicq_u64(maskH, OO);
@@ -813,33 +702,14 @@ static unsigned long long flip_G3(const unsigned long long P, const unsigned lon
 	uint32x4_t OL = vtrnq_u32(vreinterpretq_u32_u64(OO), vreinterpretq_u32_u64(OO)).val[0];
 	uint32x4_t outflankL;
 	uint64x2_t outflankH, flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	const uint64x2_t maskL = { 0x003f000000004040, 0x0000000000002010 };
-=======
-	const uint32x4_t maskL = { 0x00004040, 0x003f0000, 0x00002010, 0 };
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	const uint64x2_t maskL = { 0x003f000000004040, 0x0000000000002010 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint64x2_t maskH = { 0x4040404040000000, 0x0204081020000000 };
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint64x2_t one = vdupq_n_u64(1);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
 	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(maskL, OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(maskL, vreinterpretq_u32_s32(
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL))))));
 
 	outflankH = vbicq_u64(maskH, OO);
@@ -864,33 +734,14 @@ static unsigned long long flip_H3(const unsigned long long P, const unsigned lon
 	uint32x4_t OL = vtrnq_u32(vreinterpretq_u32_u64(OO), vreinterpretq_u32_u64(OO)).val[0];
 	uint32x4_t outflankL;
 	uint64x2_t outflankH, flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const uint64x2_t maskL = { 0x007f000000008080, 0x0000000000004020 };
-=======
-	const uint32x4_t maskL = { 0x00008080, 0x007f0000, 0x00004020, 0 };
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	const uint64x2_t maskL = { 0x007f000000008080, 0x0000000000004020 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint64x2_t maskH = { 0x8080808080000000, 0x0408102040000000 };
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint64x2_t one = vdupq_n_u64(1);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(maskL, OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(maskL, vreinterpretq_u32_s32(
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
 	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL))))));
 
 	outflankH = vbicq_u64(maskH, OO);
@@ -918,25 +769,11 @@ static unsigned long long flip_A4(const unsigned long long P, const unsigned lon
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint32x4_t one = vdupq_n_u32(1);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO)))));
 	outflankL = vandq_u32(outflankL, PP);
 	flipped = vandq_u64(maskL, vreinterpretq_u64_s32(
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-=======
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO)))));
-	outflankL = vandq_u32(outflankL, PP);
-	flipped = vandq_u64(maskL, vreinterpretq_u64_s32(vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO)))));
-	outflankL = vandq_u32(outflankL, PP);
-	flipped = vandq_u64(maskL, vreinterpretq_u64_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 
 	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OO);
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PP);
@@ -963,25 +800,11 @@ static unsigned long long flip_B4(const unsigned long long P, const unsigned lon
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint32x4_t one = vdupq_n_u32(1);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO)))));
-	outflankL = vandq_u32(outflankL, PP);
-	flipped = vandq_u64(maskL, vreinterpretq_u64_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-=======
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO)))));
-	outflankL = vandq_u32(outflankL, PP);
-	flipped = vandq_u64(maskL, vreinterpretq_u64_s32(vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO)))));
 	outflankL = vandq_u32(outflankL, PP);
 	flipped = vandq_u64(maskL, vreinterpretq_u64_s32(
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 
 	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OO);
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PP);
@@ -1004,8 +827,6 @@ static unsigned long long flip_C4(const unsigned long long P, const unsigned lon
 	uint32x4_t PH = vsetq_lane_u32(vgetq_lane_u32(PP, 1), PP, 2);	// HHHL
 	uint32x4_t OH = vsetq_lane_u32(vgetq_lane_u32(OO, 1), OO, 2);
 	uint32x4_t outflankL, outflankH, flippedL4, flippedH;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x0004040403000000, 0x0008102000020100 };
@@ -1024,33 +845,6 @@ static unsigned long long flip_C4(const unsigned long long P, const unsigned lon
 	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
 	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
-=======
-	uint32x2_t flippedL2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x0004040403000000, 0x0008102000020100 };
-	const uint64x2_t maskH = { 0x04040404f8000000, 0x0000010240201008 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), vtrnq_u32(OO, OO).val[0])))));
-	outflankL = vandq_u32(outflankL, vtrnq_u32(PP, PP).val[0]);
-	flippedL4 = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(flippedL2, vget_high_u32(flippedH));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedH)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1070,30 +864,6 @@ static unsigned long long flip_D4(const unsigned long long P, const unsigned lon
 	uint32x4_t PH = vsetq_lane_u32(vgetq_lane_u32(PP, 1), PP, 2);	// HHHL
 	uint32x4_t OH = vsetq_lane_u32(vgetq_lane_u32(OO, 1), OO, 2);
 	uint32x4_t outflankL, outflankH, flippedL4, flippedH;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x0008080807000000, 0x0010204000040201 };
-	const uint64x2_t maskH = { 0x08080808f0000000, 0x0001020480402010 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), vtrnq_u32(OO, OO).val[0])))));
-	outflankL = vandq_u32(outflankL, vtrnq_u32(PP, PP).val[0]);
-	flippedL4 = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
-=======
-	uint32x2_t flippedL2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x0008080807000000, 0x0010204000040201 };
@@ -1111,12 +881,7 @@ static unsigned long long flip_D4(const unsigned long long P, const unsigned lon
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
 	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(flippedL2, vget_high_u32(flippedH));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedH)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1136,30 +901,6 @@ static unsigned long long flip_E4(const unsigned long long P, const unsigned lon
 	uint32x4_t PH = vsetq_lane_u32(vgetq_lane_u32(PP, 1), PP, 2);	// HHHL
 	uint32x4_t OH = vsetq_lane_u32(vgetq_lane_u32(OO, 1), OO, 2);
 	uint32x4_t outflankL, outflankH, flippedL4, flippedH;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x001010100f000000, 0x0020408000080402 };
-	const uint64x2_t maskH = { 0x10101010e0000000, 0x0102040800804020 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), vtrnq_u32(OO, OO).val[0])))));
-	outflankL = vandq_u32(outflankL, vtrnq_u32(PP, PP).val[0]);
-	flippedL4 = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
-=======
-	uint32x2_t flippedL2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x001010100f000000, 0x0020408000080402 };
@@ -1177,12 +918,7 @@ static unsigned long long flip_E4(const unsigned long long P, const unsigned lon
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
 	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(flippedL2, vget_high_u32(flippedH));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedH)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1202,8 +938,6 @@ static unsigned long long flip_F4(const unsigned long long P, const unsigned lon
 	uint32x4_t PH = vsetq_lane_u32(vgetq_lane_u32(PP, 1), PP, 2);	// HHHL
 	uint32x4_t OH = vsetq_lane_u32(vgetq_lane_u32(OO, 1), OO, 2);
 	uint32x4_t outflankL, outflankH, flippedL4, flippedH;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x002020201f000000, 0x0040800000100804 };
@@ -1222,33 +956,6 @@ static unsigned long long flip_F4(const unsigned long long P, const unsigned lon
 	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
 	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
-=======
-	uint32x2_t flippedL2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x002020201f000000, 0x0040800000100804 };
-	const uint64x2_t maskH = { 0x20202020c0000000, 0x0204081000008040 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), vtrnq_u32(OO, OO).val[0])))));
-	outflankL = vandq_u32(outflankL, vtrnq_u32(PP, PP).val[0]);
-	flippedL4 = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(flippedL2, vget_high_u32(flippedH));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	flippedLH = vtrn_u32(vorr_u32(vget_low_u32(flippedL4), vget_high_u32(flippedL4)), vget_high_u32(flippedH));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedH)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1269,33 +976,14 @@ static unsigned long long flip_G4(const unsigned long long P, const unsigned lon
 	uint32x4_t OL = vtrnq_u32(vreinterpretq_u32_u64(OO), vreinterpretq_u32_u64(OO)).val[0];
 	uint32x4_t outflankL;
 	uint64x2_t outflankH, flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const uint64x2_t maskL = { 0x3f00000000404040, 0x0000000000201008 };
-=======
-	const uint32x4_t maskL = { 0x00404040, 0x3f000000, 0x00201008, 0 };
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	const uint64x2_t maskL = { 0x3f00000000404040, 0x0000000000201008 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint64x2_t maskH = { 0x4040404000000000, 0x0408102000000000 };
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint64x2_t one = vdupq_n_u64(1);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(maskL, OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(maskL, vreinterpretq_u32_s32(
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
 	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL))))));
 
 	outflankH = vbicq_u64(maskH, OO);
@@ -1320,33 +1008,14 @@ static unsigned long long flip_H4(const unsigned long long P, const unsigned lon
 	uint32x4_t OL = vtrnq_u32(vreinterpretq_u32_u64(OO), vreinterpretq_u32_u64(OO)).val[0];
 	uint32x4_t outflankL;
 	uint64x2_t outflankH, flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const uint64x2_t maskL = { 0x7f00000000808080, 0x0000000000402010 };
-=======
-	const uint32x4_t maskL = { 0x00808080, 0x7f000000, 0x00402010, 0 };
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	const uint64x2_t maskL = { 0x7f00000000808080, 0x0000000000402010 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint64x2_t maskH = { 0x8080808000000000, 0x0810204000000000 };
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint64x2_t one = vdupq_n_u64(1);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
 		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
 	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(vclzq_u32(vbicq_u32(maskL, OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(maskL, vreinterpretq_u32_s32(
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flipped = vpaddlq_u32(vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL))))));
 
 	outflankH = vbicq_u64(maskH, OO);
@@ -1372,28 +1041,8 @@ static unsigned long long flip_A5(const unsigned long long P, const unsigned lon
 	uint32x4_t outflankL, outflankH, flippedL, flippedH;
 	uint32x4x2_t flippedLH;
 	uint64x2_t flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const uint64x2_t maskL = { 0x0000000001010101, 0x0000000002040810 };
-	const uint64x2_t maskH = { 0x01010100000000fe, 0x0804020000000000 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO))))), PP);
-	flippedL = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-=======
-	const uint32x4_t maskL = { 0x01010101, 0, 0x02040810, 0 };
-	const uint32x4_t maskH = { 0x000000fe, 0x01010100, 0, 0x08040200 };
-=======
 	const uint64x2_t maskL = { 0x0000000001010101, 0x0000000002040810 };
 	const uint64x2_t maskH = { 0x01010100000000fe, 0x0804020000000000 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint32x4_t one = vdupq_n_u32(1);
 
@@ -1404,12 +1053,7 @@ static unsigned long long flip_A5(const unsigned long long P, const unsigned lon
 
 	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-<<<<<<< HEAD
-	flippedH = vandq_u32(maskH, vqsubq_u32(outflankH, one));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 
 	flippedLH = vtrnq_u32(flippedL, flippedH);
 	flipped = vreinterpretq_u64_u32(vorrq_u32(flippedLH.val[0], flippedLH.val[1]));
@@ -1433,28 +1077,8 @@ static unsigned long long flip_B5(const unsigned long long P, const unsigned lon
 	uint32x4_t outflankL, outflankH, flippedL, flippedH;
 	uint32x4x2_t flippedLH;
 	uint64x2_t flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const uint64x2_t maskL = { 0x0000000002020202, 0x0000000004081020 };
-	const uint64x2_t maskH = { 0x02020200000000fc, 0x1008040000000000 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OO))))), PP);
-	flippedL = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-=======
-	const uint32x4_t maskL = { 0x02020202, 0, 0x04081020, 0 };
-	const uint32x4_t maskH = { 0x000000fc, 0x02020200, 0, 0x10080400 };
-=======
 	const uint64x2_t maskL = { 0x0000000002020202, 0x0000000004081020 };
 	const uint64x2_t maskH = { 0x02020200000000fc, 0x1008040000000000 };
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 	const uint32x4_t msb = vdupq_n_u32(0x80000000);
 	const uint32x4_t one = vdupq_n_u32(1);
 
@@ -1465,12 +1089,7 @@ static unsigned long long flip_B5(const unsigned long long P, const unsigned lon
 
 	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), OH);
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), PH);
-<<<<<<< HEAD
-	flippedH = vandq_u32(maskH, vqsubq_u32(outflankH, one));
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedH = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
 
 	flippedLH = vtrnq_u32(flippedL, flippedH);
 	flipped = vreinterpretq_u64_u32(vorrq_u32(flippedLH.val[0], flippedLH.val[1]));
@@ -1492,29 +1111,6 @@ static unsigned long long flip_C5(const unsigned long long P, const unsigned lon
 	uint32x4_t PL = vsetq_lane_u32(vgetq_lane_u32(PP, 0), PP, 3);	// LLHL
 	uint32x4_t OL = vsetq_lane_u32(vgetq_lane_u32(OO, 0), OO, 3);
 	uint32x4_t outflankL, outflankH, flippedH4, flippedL;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x0000000304040404, 0x0810204002010000 };
-	const uint64x2_t maskH = { 0x000000f804040400, 0x0001020020100800 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flippedL = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), vtrnq_u32(OO, OO).val[1]);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
-	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
-=======
-	uint32x2_t flippedH2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x0000000304040404, 0x0810204002010000 };
@@ -1531,12 +1127,7 @@ static unsigned long long flip_C5(const unsigned long long P, const unsigned lon
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
 	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), flippedH2);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedL)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1556,29 +1147,6 @@ static unsigned long long flip_D5(const unsigned long long P, const unsigned lon
 	uint32x4_t PL = vsetq_lane_u32(vgetq_lane_u32(PP, 0), PP, 3);	// LLHL
 	uint32x4_t OL = vsetq_lane_u32(vgetq_lane_u32(OO, 0), OO, 3);
 	uint32x4_t outflankL, outflankH, flippedH4, flippedL;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x0000000708080808, 0x1020408004020100 };
-	const uint64x2_t maskH = { 0x000000f008080800, 0x0102040040201000 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flippedL = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), vtrnq_u32(OO, OO).val[1]);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
-	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
-=======
-	uint32x2_t flippedH2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x0000000708080808, 0x1020408004020100 };
@@ -1595,12 +1163,7 @@ static unsigned long long flip_D5(const unsigned long long P, const unsigned lon
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
 	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), flippedH2);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedL)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1620,29 +1183,6 @@ static unsigned long long flip_E5(const unsigned long long P, const unsigned lon
 	uint32x4_t PL = vsetq_lane_u32(vgetq_lane_u32(PP, 0), PP, 3);	// LLHL
 	uint32x4_t OL = vsetq_lane_u32(vgetq_lane_u32(OO, 0), OO, 3);
 	uint32x4_t outflankL, outflankH, flippedH4, flippedL;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x0000000f10101010, 0x2040800008040201 };
-	const uint64x2_t maskH = { 0x000000e010101000, 0x0204080080402000 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flippedL = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), vtrnq_u32(OO, OO).val[1]);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
-	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
-=======
-	uint32x2_t flippedH2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x0000000f10101010, 0x2040800008040201 };
@@ -1659,12 +1199,7 @@ static unsigned long long flip_E5(const unsigned long long P, const unsigned lon
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
 	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), flippedH2);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedL)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1684,29 +1219,6 @@ static unsigned long long flip_F5(const unsigned long long P, const unsigned lon
 	uint32x4_t PL = vsetq_lane_u32(vgetq_lane_u32(PP, 0), PP, 3);	// LLHL
 	uint32x4_t OL = vsetq_lane_u32(vgetq_lane_u32(OO, 0), OO, 3);
 	uint32x4_t outflankL, outflankH, flippedH4, flippedL;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	uint32x2x2_t flippedLH;
-	uint64x1_t flipped;
-	const uint64x2_t maskL = { 0x0000001f20202020, 0x4080000010080402 };
-	const uint64x2_t maskH = { 0x000000c020202000, 0x0408100000804000 };
-	const uint32x4_t msb = vdupq_n_u32(0x80000000);
-	const uint32x4_t one = vdupq_n_u32(1);
-
-	outflankL = vandq_u32(vshlq_u32(msb, vnegq_s32(vreinterpretq_s32_u32(
-		vclzq_u32(vbicq_u32(vreinterpretq_u32_u64(maskL), OL))))), PL);
-	flippedL = vandq_u32(vreinterpretq_u32_u64(maskL), vreinterpretq_u32_s32(
-		vnegq_s32(vreinterpretq_s32_u32(vaddq_u32(outflankL, outflankL)))));
-
-	outflankH = vbicq_u32(vreinterpretq_u32_u64(maskH), vtrnq_u32(OO, OO).val[1]);
-	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
-	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
-
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
-=======
-	uint32x2_t flippedH2;
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	uint32x2x2_t flippedLH;
 	uint64x1_t flipped;
 	const uint64x2_t maskL = { 0x0000001f20202020, 0x4080000010080402 };
@@ -1723,12 +1235,7 @@ static unsigned long long flip_F5(const unsigned long long P, const unsigned lon
 	outflankH = vandq_u32(vbicq_u32(outflankH, vsubq_u32(outflankH, one)), vtrnq_u32(PP, PP).val[1]);
 	flippedH4 = vandq_u32(vreinterpretq_u32_u64(maskH), vqsubq_u32(outflankH, one));
 
-<<<<<<< HEAD
-	flippedLH = vtrn_u32(vget_high_u32(flippedL), flippedH2);
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	flippedLH = vtrn_u32(vget_high_u32(flippedL), vorr_u32(vget_low_u32(flippedH4), vget_high_u32(flippedH4)));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped = vreinterpret_u64_u32(vorr_u32(vorr_u32(flippedLH.val[0], flippedLH.val[1]), vget_low_u32(flippedL)));
 
 	return vget_lane_u64(flipped, 0);
@@ -1887,18 +1394,8 @@ static unsigned long long flip_D6(const unsigned long long P, const unsigned lon
 	outflank_h = OUTFLANK_3[(O >> 41) & 0x3f] & rotl8(P >> 40, 3);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_3_H[outflank_h] << 40;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 16) & 0x08142240) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000081422418000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-=======
-	outflank_d = ((P & 0x0000081422418000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0000081422400000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 16) & 0x08142240) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000081422418000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0000081422400000;	// A3D6H2
 
 	flipped |= (((P >> 9) & 0x0010000000000000) | ((P >> 8) & 0x0008000000000000) | ((P >> 7) & 0x0004000000000000)) & O;
@@ -1924,18 +1421,8 @@ static unsigned long long flip_E6(const unsigned long long P, const unsigned lon
 	outflank_h = OUTFLANK_4[(O >> 41) & 0x3f] & rotl8(P >> 40, 2);
 	flipped |= (unsigned long long)(unsigned char) FLIPPED_4_H[outflank_h] << 40;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 16) & 0x10284402) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0000102844820100) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-=======
-	outflank_d = ((P & 0x0000102844820100) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0000102844020000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 16) & 0x10284402) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0000102844820100) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0000102844020000;	// A2E6H3
 
 	flipped |= (((P >> 9) & 0x0020000000000000) | ((P >> 8) & 0x0010000000000000) | ((P >> 7) & 0x0008000000000000)) & O;
@@ -2111,18 +1598,8 @@ static unsigned long long flip_D7(const unsigned long long P, const unsigned lon
 	outflank_v = outflank_right(O, 0x0000080808080808) & P;
 	flipped  = (outflank_v * -2) & 0x0000080808080808;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 24) & 0x08142240) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0008142241800000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-=======
-	outflank_d = ((P & 0x0008142241800000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
-	outflank_d = OUTFLANK_3[((O & 0x0008142240000000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 24) & 0x08142240) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0008142241800000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0008142240000000;	// A4D7H3
 
 	outflank_h = OUTFLANK_3[(O >> 49) & 0x3f] & rotl8(P >> 48, 3);
@@ -2146,18 +1623,8 @@ static unsigned long long flip_E7(const unsigned long long P, const unsigned lon
 	outflank_v = outflank_right(O, 0x0000101010101010) & P;
 	flipped  = (outflank_v * -2) & 0x0000101010101010;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 24) & 0x10284402) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0010284482010000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-=======
-	outflank_d = ((P & 0x0010284482010000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
-	outflank_d = OUTFLANK_4[((O & 0x0010284402000000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 24) & 0x10284402) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0010284482010000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x0010284402000000;	// A3E7H4
 
 	outflank_h = OUTFLANK_4[(O >> 49) & 0x3f] & rotl8(P >> 48, 2);
@@ -2326,18 +1793,8 @@ static unsigned long long flip_D8(const unsigned long long P, const unsigned lon
 	outflank_v = outflank_right(O, 0x0008080808080808) & P;
 	flipped  = (outflank_v * -2) & 0x0008080808080808;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 32) & 0x08142240) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x0814224180000000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]e0cba...
-=======
-	outflank_d = ((P & 0x0814224180000000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]e0cba...
-	outflank_d = OUTFLANK_3[((O & 0x0814224000000000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 	outflank_d = OUTFLANK_3[(((unsigned int) (O >> 32) & 0x08142240) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x0814224180000000) * 0x0101010101010101) >> 53;	// hgfedc[bahgf]e0cba...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_3_H[outflank_d] & 0x0814224000000000;	// A5D8H4
 
 	outflank_h = OUTFLANK_3[(O >> 57) & 0x3f] & rotl8(P >> 56, 3);
@@ -2361,18 +1818,8 @@ static unsigned long long flip_E8(const unsigned long long P, const unsigned lon
 	outflank_v = outflank_right(O, 0x0010101010101010) & P;
 	flipped  = (outflank_v * -2) & 0x0010101010101010;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 32) & 0x10284402) * 0x01010101) >> 25];
 	outflank_d &= ((P & 0x1028448201000000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]f0dcba...
-=======
-	outflank_d = ((P & 0x1028448201000000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]f0dcba...
-	outflank_d = OUTFLANK_4[((O & 0x1028440200000000) * 0x0101010101010101) >> 57] & outflank_d;
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	outflank_d = OUTFLANK_4[(((unsigned int) (O >> 32) & 0x10284402) * 0x01010101) >> 25];
-	outflank_d &= ((P & 0x1028448201000000) * 0x0101010101010101) >> 54;	// hgfed[cbahg]f0dcba...
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped |= FLIPPED_4_H[outflank_d] & 0x1028440200000000;	// A4E8H5
 
 	outflank_h = OUTFLANK_4[(O >> 57) & 0x3f] & rotl8(P >> 56, 2);
@@ -2469,19 +1916,8 @@ static unsigned long long flip_pass(const unsigned long long P, const unsigned l
 	return 0;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 /** Array of functions to compute flipped discs */
 unsigned long long (*flip_neon[])(const unsigned long long, const unsigned long long) = {
-=======
-
-/** Array of functions to compute flipped discs */
-unsigned long long (*flip[])(const unsigned long long, const unsigned long long) = {
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-/** Array of functions to compute flipped discs */
-unsigned long long (*flip_neon[])(const unsigned long long, const unsigned long long) = {
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flip_A1, flip_B1, flip_C1, flip_D1, flip_E1, flip_F1, flip_G1, flip_H1,
 	flip_A2, flip_B2, flip_C2, flip_D2, flip_E2, flip_F2, flip_G2, flip_H2,
 	flip_A3, flip_B3, flip_C3, flip_D3, flip_E3, flip_F3, flip_G3, flip_H3,
@@ -2493,11 +1929,4 @@ unsigned long long (*flip_neon[])(const unsigned long long, const unsigned long
 	flip_pass, flip_pass
 };
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-#endif
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
 #endif
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
diff --git a/src/flip_neon_lzcnt.c b/src/flip_neon_lzcnt.c
index ae7aa49..d5cf0f4 100644
--- a/src/flip_neon_lzcnt.c
+++ b/src/flip_neon_lzcnt.c
@@ -14,10 +14,6 @@
 
 #include "arm_neon.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 static const uint64x2_t lrmask_v4[66][4] = {
 	{{ 0x00000000000000fe, 0x0101010101010100 }, { 0x8040201008040200, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200 }, { 0x0080402010080400, 0x0000000000000100 }, { 0x0000000000000001, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
@@ -85,147 +81,6 @@ static const uint64x2_t lrmask_v4[66][4] = {
 	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x7f00000000000000, 0x0080808080808080 }, { 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const uint64x2_t lmask_v4[66][2] = {
-	{{ 0x00000000000000fe, 0x0101010101010100 }, { 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200 }, { 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400 }, { 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800 }, { 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000 }, { 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000 }, { 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000 }, { 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000 }, { 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000 }, { 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000 }, { 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000 }, { 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000 }, { 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000 }, { 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000 }, { 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000 }, { 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000 }, { 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000 }, { 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000 }, { 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000 }, { 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000 }, { 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000 }, { 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000 }, { 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000 }, { 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000 }, { 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000 }, { 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000 }, { 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000 }, { 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000 }, { 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000 }, { 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000 }, { 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000 }, { 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000 }, { 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000 }, { 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000 }, { 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000 }, { 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000 }, { 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000 }, { 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000 }, { 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000 }, { 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000 }, { 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000 }, { 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000 }, { 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000 }, { 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000 }, { 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000 }, { 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000 }, { 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000 }, { 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000 }, { 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000 }, { 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000 }, { 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000 }, { 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000 }, { 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000 }, { 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000 }, { 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000 }, { 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000 }, { 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }}
-};
-
-static const uint64x2_t rmask_v4[66][2] = {
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001 }, { 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002 }, { 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004 }, { 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008 }, { 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010 }, { 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020 }, { 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040 }, { 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080 }, { 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101 }, { 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202 }, { 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404 }, { 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808 }, { 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010 }, { 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020 }, { 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040 }, { 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080 }, { 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101 }, { 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202 }, { 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404 }, { 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808 }, { 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010 }, { 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020 }, { 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040 }, { 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080 }, { 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101 }, { 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202 }, { 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404 }, { 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808 }, { 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010 }, { 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020 }, { 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040 }, { 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080 }, { 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101 }, { 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202 }, { 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404 }, { 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808 }, { 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010 }, { 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020 }, { 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040 }, { 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080 }, { 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101 }, { 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202 }, { 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404 }, { 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808 }, { 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010 }, { 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020 }, { 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040 }, { 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080 }, { 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101 }, { 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202 }, { 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404 }, { 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808 }, { 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010 }, { 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020 }, { 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040 }, { 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080 }, { 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 };
 
 /**
@@ -237,12 +92,8 @@ static const uint64x2_t rmask_v4[66][2] = {
  * @return flipped disc pattern.
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifndef HAS_CPU_64
 #define vceqzq_u32(x)	vmvnq_u32(vtstq_u32((x), (x)))
-<<<<<<< HEAD
 #endif
 
 uint64x2_t mm_Flip(uint64x2_t OP, int pos)
@@ -279,59 +130,3 @@ uint64x2_t mm_Flip(uint64x2_t OP, int pos)
 
 	return vorrq_u64(flip, vextq_u64(flip, flip, 1));
 }
-=======
-=======
-#ifndef __aarch64__
-=======
-#ifndef HAS_CPU_64
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-#define vceqzq_u32(x)	vmvnq_u32(vtstq_u32((x), (x)))
-#define	vnegq_s64(x)	vsubq_s64(vdupq_n_s64(0), (x))
-=======
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-#endif
-
-<<<<<<< HEAD
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-unsigned long long Flip(int pos, unsigned long long P, unsigned long long O)
-=======
-uint64x2_t mm_Flip(uint64x2_t OP, int pos)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-{
-	uint64x2_t	flip, oflank0, mask0;				uint64x2_t	oflank1, mask1;
-	int32x4_t	clz0;						int32x4_t	clz1;
-	uint32x4_t	msb0;						uint32x4_t	msb1;
-	const uint64x2_t one = vdupq_n_u64(1);
-	uint64x2_t PP = vdupq_lane_u64(vget_low_u64(OP), 0);
-	uint64x2_t OO = vdupq_lane_u64(vget_high_u64(OP), 0);
-
-	mask0 = lrmask_v4[pos][2];					mask1 = lrmask_v4[pos][3];
-		// isolate non-opponent MS1B
-	oflank0 = vbicq_u64(mask0, OO);					oflank1 = vbicq_u64(mask1, OO);
-		// outflank = (0x8000000000000000ULL >> lzcnt) & P
-	clz0 = vclzq_s32(vreinterpretq_s32_u64(oflank0));		clz1 = vclzq_s32(vreinterpretq_s32_u64(oflank1));
-		// set loword's MSB if hiword = 0
-	msb0 = vreinterpretq_u32_u64(vshrq_n_u64(oflank0, 32));		msb1 = vreinterpretq_u32_u64(vshrq_n_u64(oflank1, 32));
-	msb0 = vshlq_n_u32(vceqzq_u32(msb0), 31);			msb1 = vshlq_n_u32(vceqzq_u32(msb1), 31);
-	msb0 = vshlq_u32(msb0, vnegq_s32(clz0));			msb1 = vshlq_u32(msb1, vnegq_s32(clz1));
-		// 0 if outflank is P, otherwise oflank = msb
-	oflank0 = vbicq_u64(vreinterpretq_u64_u32(msb0), PP);		oflank1 = vbicq_u64(vreinterpretq_u64_u32(msb1), PP);
-		// set all bits higher than outflank
-	oflank0 = vsubq_u64(oflank0, vreinterpretq_u64_u32(msb0));	oflank1 = vsubq_u64(oflank1, vreinterpretq_u64_u32(msb1));
-	flip = vandq_u64(vbslq_u64(mask1, oflank1, vandq_u64(mask0, oflank0)), OO);
-
-	mask0 = lrmask_v4[pos][0];					mask1 = lrmask_v4[pos][1];
-		// get outflank with carry-propagation
-	oflank0 = vaddq_u64(vornq_u64(OO, mask0), one);			oflank1 = vaddq_u64(vornq_u64(OO, mask1), one);
-	oflank0 = vandq_u64(vandq_u64(PP, mask0), oflank0);		oflank1 = vandq_u64(vandq_u64(PP, mask1), oflank1);
-		// set all bits lower than oflank, using satulation if oflank = 0
-	oflank0 = vqsubq_u64(oflank0, one);				oflank1 = vqsubq_u64(oflank1, one);
-	flip = vbslq_u64(mask1, oflank1, vbslq_u64(mask0, oflank0, flip));
-
-	return vorrq_u64(flip, vextq_u64(flip, flip, 1));
-}
-<<<<<<< HEAD
-
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
diff --git a/src/flip_neon_ppfill.c b/src/flip_neon_ppfill.c
index 675a7b3..cd74513 100644
--- a/src/flip_neon_ppfill.c
+++ b/src/flip_neon_ppfill.c
@@ -15,10 +15,6 @@
 
 #include "arm_neon.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 static const uint64x2_t lrmask_v4[66][4] = {
 	{{ 0x00000000000000fe, 0x0101010101010100 }, { 0x8040201008040200, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
 	{{ 0x00000000000000fc, 0x0202020202020200 }, { 0x0080402010080400, 0x0000000000000100 }, { 0x0000000000000001, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
@@ -86,147 +82,6 @@ static const uint64x2_t lrmask_v4[66][4] = {
 	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x7f00000000000000, 0x0080808080808080 }, { 0x0040201008040201, 0x0000000000000000 }},
 	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},	// pass
 	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }}
-<<<<<<< HEAD
-=======
-static const uint64x2_t lmask_v4[66][2] = {
-	{{ 0x00000000000000fe, 0x0101010101010100 }, { 0x8040201008040200, 0x0000000000000000 }},
-	{{ 0x00000000000000fc, 0x0202020202020200 }, { 0x0080402010080400, 0x0000000000000100 }},
-	{{ 0x00000000000000f8, 0x0404040404040400 }, { 0x0000804020100800, 0x0000000000010200 }},
-	{{ 0x00000000000000f0, 0x0808080808080800 }, { 0x0000008040201000, 0x0000000001020400 }},
-	{{ 0x00000000000000e0, 0x1010101010101000 }, { 0x0000000080402000, 0x0000000102040800 }},
-	{{ 0x00000000000000c0, 0x2020202020202000 }, { 0x0000000000804000, 0x0000010204081000 }},
-	{{ 0x0000000000000080, 0x4040404040404000 }, { 0x0000000000008000, 0x0001020408102000 }},
-	{{ 0x0000000000000000, 0x8080808080808000 }, { 0x0000000000000000, 0x0102040810204000 }},
-	{{ 0x000000000000fe00, 0x0101010101010000 }, { 0x4020100804020000, 0x0000000000000000 }},
-	{{ 0x000000000000fc00, 0x0202020202020000 }, { 0x8040201008040000, 0x0000000000010000 }},
-	{{ 0x000000000000f800, 0x0404040404040000 }, { 0x0080402010080000, 0x0000000001020000 }},
-	{{ 0x000000000000f000, 0x0808080808080000 }, { 0x0000804020100000, 0x0000000102040000 }},
-	{{ 0x000000000000e000, 0x1010101010100000 }, { 0x0000008040200000, 0x0000010204080000 }},
-	{{ 0x000000000000c000, 0x2020202020200000 }, { 0x0000000080400000, 0x0001020408100000 }},
-	{{ 0x0000000000008000, 0x4040404040400000 }, { 0x0000000000800000, 0x0102040810200000 }},
-	{{ 0x0000000000000000, 0x8080808080800000 }, { 0x0000000000000000, 0x0204081020400000 }},
-	{{ 0x0000000000fe0000, 0x0101010101000000 }, { 0x2010080402000000, 0x0000000000000000 }},
-	{{ 0x0000000000fc0000, 0x0202020202000000 }, { 0x4020100804000000, 0x0000000001000000 }},
-	{{ 0x0000000000f80000, 0x0404040404000000 }, { 0x8040201008000000, 0x0000000102000000 }},
-	{{ 0x0000000000f00000, 0x0808080808000000 }, { 0x0080402010000000, 0x0000010204000000 }},
-	{{ 0x0000000000e00000, 0x1010101010000000 }, { 0x0000804020000000, 0x0001020408000000 }},
-	{{ 0x0000000000c00000, 0x2020202020000000 }, { 0x0000008040000000, 0x0102040810000000 }},
-	{{ 0x0000000000800000, 0x4040404040000000 }, { 0x0000000080000000, 0x0204081020000000 }},
-	{{ 0x0000000000000000, 0x8080808080000000 }, { 0x0000000000000000, 0x0408102040000000 }},
-	{{ 0x00000000fe000000, 0x0101010100000000 }, { 0x1008040200000000, 0x0000000000000000 }},
-	{{ 0x00000000fc000000, 0x0202020200000000 }, { 0x2010080400000000, 0x0000000100000000 }},
-	{{ 0x00000000f8000000, 0x0404040400000000 }, { 0x4020100800000000, 0x0000010200000000 }},
-	{{ 0x00000000f0000000, 0x0808080800000000 }, { 0x8040201000000000, 0x0001020400000000 }},
-	{{ 0x00000000e0000000, 0x1010101000000000 }, { 0x0080402000000000, 0x0102040800000000 }},
-	{{ 0x00000000c0000000, 0x2020202000000000 }, { 0x0000804000000000, 0x0204081000000000 }},
-	{{ 0x0000000080000000, 0x4040404000000000 }, { 0x0000008000000000, 0x0408102000000000 }},
-	{{ 0x0000000000000000, 0x8080808000000000 }, { 0x0000000000000000, 0x0810204000000000 }},
-	{{ 0x000000fe00000000, 0x0101010000000000 }, { 0x0804020000000000, 0x0000000000000000 }},
-	{{ 0x000000fc00000000, 0x0202020000000000 }, { 0x1008040000000000, 0x0000010000000000 }},
-	{{ 0x000000f800000000, 0x0404040000000000 }, { 0x2010080000000000, 0x0001020000000000 }},
-	{{ 0x000000f000000000, 0x0808080000000000 }, { 0x4020100000000000, 0x0102040000000000 }},
-	{{ 0x000000e000000000, 0x1010100000000000 }, { 0x8040200000000000, 0x0204080000000000 }},
-	{{ 0x000000c000000000, 0x2020200000000000 }, { 0x0080400000000000, 0x0408100000000000 }},
-	{{ 0x0000008000000000, 0x4040400000000000 }, { 0x0000800000000000, 0x0810200000000000 }},
-	{{ 0x0000000000000000, 0x8080800000000000 }, { 0x0000000000000000, 0x1020400000000000 }},
-	{{ 0x0000fe0000000000, 0x0101000000000000 }, { 0x0402000000000000, 0x0000000000000000 }},
-	{{ 0x0000fc0000000000, 0x0202000000000000 }, { 0x0804000000000000, 0x0001000000000000 }},
-	{{ 0x0000f80000000000, 0x0404000000000000 }, { 0x1008000000000000, 0x0102000000000000 }},
-	{{ 0x0000f00000000000, 0x0808000000000000 }, { 0x2010000000000000, 0x0204000000000000 }},
-	{{ 0x0000e00000000000, 0x1010000000000000 }, { 0x4020000000000000, 0x0408000000000000 }},
-	{{ 0x0000c00000000000, 0x2020000000000000 }, { 0x8040000000000000, 0x0810000000000000 }},
-	{{ 0x0000800000000000, 0x4040000000000000 }, { 0x0080000000000000, 0x1020000000000000 }},
-	{{ 0x0000000000000000, 0x8080000000000000 }, { 0x0000000000000000, 0x2040000000000000 }},
-	{{ 0x00fe000000000000, 0x0100000000000000 }, { 0x0200000000000000, 0x0000000000000000 }},
-	{{ 0x00fc000000000000, 0x0200000000000000 }, { 0x0400000000000000, 0x0100000000000000 }},
-	{{ 0x00f8000000000000, 0x0400000000000000 }, { 0x0800000000000000, 0x0200000000000000 }},
-	{{ 0x00f0000000000000, 0x0800000000000000 }, { 0x1000000000000000, 0x0400000000000000 }},
-	{{ 0x00e0000000000000, 0x1000000000000000 }, { 0x2000000000000000, 0x0800000000000000 }},
-	{{ 0x00c0000000000000, 0x2000000000000000 }, { 0x4000000000000000, 0x1000000000000000 }},
-	{{ 0x0080000000000000, 0x4000000000000000 }, { 0x8000000000000000, 0x2000000000000000 }},
-	{{ 0x0000000000000000, 0x8000000000000000 }, { 0x0000000000000000, 0x4000000000000000 }},
-	{{ 0xfe00000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xfc00000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf800000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xf000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xe000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0xc000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x8000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }}
-};
-
-static const uint64x2_t rmask_v4[66][2] = {
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000001, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000003, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000007, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000000f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000001f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000003f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x000000000000007f, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000001 }, { 0x0000000000000000, 0x0000000000000002 }},
-	{{ 0x0000000000000100, 0x0000000000000002 }, { 0x0000000000000001, 0x0000000000000004 }},
-	{{ 0x0000000000000300, 0x0000000000000004 }, { 0x0000000000000002, 0x0000000000000008 }},
-	{{ 0x0000000000000700, 0x0000000000000008 }, { 0x0000000000000004, 0x0000000000000010 }},
-	{{ 0x0000000000000f00, 0x0000000000000010 }, { 0x0000000000000008, 0x0000000000000020 }},
-	{{ 0x0000000000001f00, 0x0000000000000020 }, { 0x0000000000000010, 0x0000000000000040 }},
-	{{ 0x0000000000003f00, 0x0000000000000040 }, { 0x0000000000000020, 0x0000000000000080 }},
-	{{ 0x0000000000007f00, 0x0000000000000080 }, { 0x0000000000000040, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000101 }, { 0x0000000000000000, 0x0000000000000204 }},
-	{{ 0x0000000000010000, 0x0000000000000202 }, { 0x0000000000000100, 0x0000000000000408 }},
-	{{ 0x0000000000030000, 0x0000000000000404 }, { 0x0000000000000201, 0x0000000000000810 }},
-	{{ 0x0000000000070000, 0x0000000000000808 }, { 0x0000000000000402, 0x0000000000001020 }},
-	{{ 0x00000000000f0000, 0x0000000000001010 }, { 0x0000000000000804, 0x0000000000002040 }},
-	{{ 0x00000000001f0000, 0x0000000000002020 }, { 0x0000000000001008, 0x0000000000004080 }},
-	{{ 0x00000000003f0000, 0x0000000000004040 }, { 0x0000000000002010, 0x0000000000008000 }},
-	{{ 0x00000000007f0000, 0x0000000000008080 }, { 0x0000000000004020, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000010101 }, { 0x0000000000000000, 0x0000000000020408 }},
-	{{ 0x0000000001000000, 0x0000000000020202 }, { 0x0000000000010000, 0x0000000000040810 }},
-	{{ 0x0000000003000000, 0x0000000000040404 }, { 0x0000000000020100, 0x0000000000081020 }},
-	{{ 0x0000000007000000, 0x0000000000080808 }, { 0x0000000000040201, 0x0000000000102040 }},
-	{{ 0x000000000f000000, 0x0000000000101010 }, { 0x0000000000080402, 0x0000000000204080 }},
-	{{ 0x000000001f000000, 0x0000000000202020 }, { 0x0000000000100804, 0x0000000000408000 }},
-	{{ 0x000000003f000000, 0x0000000000404040 }, { 0x0000000000201008, 0x0000000000800000 }},
-	{{ 0x000000007f000000, 0x0000000000808080 }, { 0x0000000000402010, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000001010101 }, { 0x0000000000000000, 0x0000000002040810 }},
-	{{ 0x0000000100000000, 0x0000000002020202 }, { 0x0000000001000000, 0x0000000004081020 }},
-	{{ 0x0000000300000000, 0x0000000004040404 }, { 0x0000000002010000, 0x0000000008102040 }},
-	{{ 0x0000000700000000, 0x0000000008080808 }, { 0x0000000004020100, 0x0000000010204080 }},
-	{{ 0x0000000f00000000, 0x0000000010101010 }, { 0x0000000008040201, 0x0000000020408000 }},
-	{{ 0x0000001f00000000, 0x0000000020202020 }, { 0x0000000010080402, 0x0000000040800000 }},
-	{{ 0x0000003f00000000, 0x0000000040404040 }, { 0x0000000020100804, 0x0000000080000000 }},
-	{{ 0x0000007f00000000, 0x0000000080808080 }, { 0x0000000040201008, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000101010101 }, { 0x0000000000000000, 0x0000000204081020 }},
-	{{ 0x0000010000000000, 0x0000000202020202 }, { 0x0000000100000000, 0x0000000408102040 }},
-	{{ 0x0000030000000000, 0x0000000404040404 }, { 0x0000000201000000, 0x0000000810204080 }},
-	{{ 0x0000070000000000, 0x0000000808080808 }, { 0x0000000402010000, 0x0000001020408000 }},
-	{{ 0x00000f0000000000, 0x0000001010101010 }, { 0x0000000804020100, 0x0000002040800000 }},
-	{{ 0x00001f0000000000, 0x0000002020202020 }, { 0x0000001008040201, 0x0000004080000000 }},
-	{{ 0x00003f0000000000, 0x0000004040404040 }, { 0x0000002010080402, 0x0000008000000000 }},
-	{{ 0x00007f0000000000, 0x0000008080808080 }, { 0x0000004020100804, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000010101010101 }, { 0x0000000000000000, 0x0000020408102040 }},
-	{{ 0x0001000000000000, 0x0000020202020202 }, { 0x0000010000000000, 0x0000040810204080 }},
-	{{ 0x0003000000000000, 0x0000040404040404 }, { 0x0000020100000000, 0x0000081020408000 }},
-	{{ 0x0007000000000000, 0x0000080808080808 }, { 0x0000040201000000, 0x0000102040800000 }},
-	{{ 0x000f000000000000, 0x0000101010101010 }, { 0x0000080402010000, 0x0000204080000000 }},
-	{{ 0x001f000000000000, 0x0000202020202020 }, { 0x0000100804020100, 0x0000408000000000 }},
-	{{ 0x003f000000000000, 0x0000404040404040 }, { 0x0000201008040201, 0x0000800000000000 }},
-	{{ 0x007f000000000000, 0x0000808080808080 }, { 0x0000402010080402, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0001010101010101 }, { 0x0000000000000000, 0x0002040810204080 }},
-	{{ 0x0100000000000000, 0x0002020202020202 }, { 0x0001000000000000, 0x0004081020408000 }},
-	{{ 0x0300000000000000, 0x0004040404040404 }, { 0x0002010000000000, 0x0008102040800000 }},
-	{{ 0x0700000000000000, 0x0008080808080808 }, { 0x0004020100000000, 0x0010204080000000 }},
-	{{ 0x0f00000000000000, 0x0010101010101010 }, { 0x0008040201000000, 0x0020408000000000 }},
-	{{ 0x1f00000000000000, 0x0020202020202020 }, { 0x0010080402010000, 0x0040800000000000 }},
-	{{ 0x3f00000000000000, 0x0040404040404040 }, { 0x0020100804020100, 0x0080000000000000 }},
-	{{ 0x7f00000000000000, 0x0080808080808080 }, { 0x0040201008040201, 0x0000000000000000 }},
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }},	// pass
-	{{ 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000 }}
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 };
 
 /**
@@ -238,44 +93,7 @@ static const uint64x2_t rmask_v4[66][2] = {
  * @return flipped disc pattern.
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-uint64x2_t mm_Flip(uint64x2_t OP, int pos)
-{
-	uint64x2_t	flip, oflank0, eraser0, mask0;			uint64x2_t	oflank1, eraser1, mask1;
-	const int64x2_t lshift18 = { 1, 8 };				const int64x2_t lshift79 = { 9, 7 };
-	int64x2_t rshift18 = { -1, -8 };				int64x2_t rshift79 = { -9, -7 };
-	const uint64x2_t one = vdupq_n_u64(1);
-	uint64x2_t PP = vdupq_lane_u64(vget_low_u64(OP), 0);
-	uint64x2_t OO = vdupq_lane_u64(vget_high_u64(OP), 0);
-
-	mask0 = lrmask_v4[pos][2];					mask1 = lrmask_v4[pos][3];
-		// isolate non-opponent MS1B by clearing lower bits
-	eraser0 = vbicq_u64(mask0, OO);					eraser1 = vbicq_u64(mask1, OO);
-		// clear valid bits only using variable shift
-	oflank0 = vshlq_u64(vandq_u64(PP, mask0), lshift18);		oflank1 = vshlq_u64(vandq_u64(PP, mask1), lshift79);
-	eraser0 = vorrq_u64(eraser0, vshlq_u64(eraser0, rshift18));	eraser1 = vorrq_u64(eraser1, vshlq_u64(eraser1, rshift79));
-	rshift18 = vaddq_s64(rshift18, rshift18);			rshift79 = vaddq_s64(rshift79, rshift79);
-	eraser0 = vorrq_u64(eraser0, vshlq_u64(eraser0, rshift18));	eraser1 = vorrq_u64(eraser1, vshlq_u64(eraser1, rshift79));
-	eraser0 = vorrq_u64(eraser0, vshlq_u64(eraser0, rshift18));	eraser1 = vorrq_u64(eraser1, vshlq_u64(eraser1, rshift79));
-	oflank0 = vbicq_u64(oflank0, eraser0);				oflank1 = vbicq_u64(oflank1, eraser1);
-		// set mask bits higher than oflank
-	flip = vbicq_u64(mask0, vsubq_u64(oflank0, one));		flip = vorrq_u64(flip, vbicq_u64(mask1, vsubq_u64(oflank1, one)));
-
-	mask0 = lrmask_v4[pos][0];					mask1 = lrmask_v4[pos][1];
-		// get outflank with carry-propagation
-	oflank0 = vaddq_u64(vornq_u64(OO, mask0), one);			oflank1 = vaddq_u64(vornq_u64(OO, mask1), one);
-	oflank0 = vandq_u64(vandq_u64(PP, mask0), oflank0);		oflank1 = vandq_u64(vandq_u64(PP, mask1), oflank1);
-		// set all bits lower than oflank, using satulation if oflank = 0
-	oflank0 = vqsubq_u64(oflank0, one);				oflank1 = vqsubq_u64(oflank1, one);
-	flip = vbslq_u64(mask1, oflank1, vbslq_u64(mask0, oflank0, flip));
-
-	return vorrq_u64(flip, vextq_u64(flip, flip, 1));
-=======
-unsigned long long Flip(int pos, unsigned long long P, unsigned long long O)
-=======
 uint64x2_t mm_Flip(uint64x2_t OP, int pos)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 {
 	uint64x2_t	flip, oflank0, eraser0, mask0;			uint64x2_t	oflank1, eraser1, mask1;
 	const int64x2_t lshift18 = { 1, 8 };				const int64x2_t lshift79 = { 9, 7 };
@@ -305,12 +123,6 @@ uint64x2_t mm_Flip(uint64x2_t OP, int pos)
 	oflank0 = vqsubq_u64(oflank0, one);				oflank1 = vqsubq_u64(oflank1, one);
 	flip = vbslq_u64(mask1, oflank1, vbslq_u64(mask0, oflank0, flip));
 
-<<<<<<< HEAD
-	flip0 = vorrq_u64(flip0, flip1);
-	return vget_lane_u64(vorr_u64(vget_low_u64(flip0), vget_high_u64(flip0)), 0);
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
 	return vorrq_u64(flip, vextq_u64(flip, flip, 1));
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 }
 
diff --git a/src/flip_sse.c b/src/flip_sse.c
index 35ee07e..2014779 100644
--- a/src/flip_sse.c
+++ b/src/flip_sse.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 80c96fb (adding back MSB to get flip mask)
 /**
  * @file flip_sse.c
  *
@@ -182,15 +178,7 @@ static inline __m128i MS1B_epu64(const __m128i x) {
  * https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx/41148578#41148578
  */
  static inline __m128i MS1B_epu52(const __m128i x) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	const __m128d k1e52 = _mm_set1_pd(0x0010000000000000);
-=======
-	const __m128d k1e52 = _mm_castsi128_pd(_mm_set1_epi64x((1023ULL + 52) << 52));
->>>>>>> 80c96fb (adding back MSB to get flip mask)
-=======
-	const __m128d k1e52 = _mm_set1_pd(0x0010000000000000);
->>>>>>> 593fff4 (use appropriate _mm_set1)
 	const __m128d exp_mask = _mm_castsi128_pd(_mm_set1_epi64x(0xfff0000000000000));
 	__m128d f;
 	f = _mm_or_pd(_mm_castsi128_pd(x), k1e52);	// construct double x + 2^52
@@ -1415,15 +1403,7 @@ static __m128i vectorcall flip_C6(const __m128i OP)
  */
 static __m128i vectorcall flip_D6(const __m128i OP)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	__m128i	flipped, flipped_c7e7, index_d;
-=======
-	__m128i	flipped, flipped_h_c7e7, index_d;
->>>>>>> 80c96fb (adding back MSB to get flip mask)
-=======
 	__m128i	flipped, flipped_c7e7, index_d;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	unsigned int outflank_h, outflank_v, outflank_d, index_v;
 	const __m128i mask = _mm_set_epi64x(0x0000081422418000, 0x0808080808080808);	// A3D6H2
 
@@ -1437,25 +1417,11 @@ static __m128i vectorcall flip_D6(const __m128i OP)
 
 	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 6) >> 9) & 0x3f] & rotl8(_mm_extract_epi16(OP, 2) >> 8, 3);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped_c7e7 = _mm_shuffle_epi32(OP, 0xf5);
 	flipped_c7e7 = _mm_and_si128(flipped_c7e7, _mm_set_epi32(0x00100000, 0x00040000, 0x20000000, 0x02000000));
 	flipped_c7e7 = _mm_min_epi16(flipped_c7e7, _mm_shuffle_epi32(flipped_c7e7, SWAP64));
 	flipped = _mm_or_si128(flipped, _mm_unpacklo_epi16(
 		_mm_slli_epi64(_mm_loadl_epi64((__m128i *) &FLIPPED_3_H[outflank_h]), 56), flipped_c7e7));
-<<<<<<< HEAD
-=======
-	flipped_h_c7e7 = _mm_unpacklo_epi64(_mm_srli_epi64(OP, 7), _mm_srli_epi64(OP, 9));
-	flipped_h_c7e7 = _mm_and_si128(flipped_h_c7e7, _mm_shuffle_epi32(OP, DUPHI));
-	flipped_h_c7e7 = _mm_insert_epi16(flipped_h_c7e7, FLIPPED_3_H[outflank_h], 2);
-	flipped_h_c7e7 = _mm_and_si128(flipped_h_c7e7, _mm_set_epi64x(0x0010000000000000, 0x0004ff0000000000));
-	flipped = _mm_or_si128(flipped, flipped_h_c7e7);
->>>>>>> 80c96fb (adding back MSB to get flip mask)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
 }
@@ -1469,15 +1435,7 @@ static __m128i vectorcall flip_D6(const __m128i OP)
  */
 static __m128i vectorcall flip_E6(const __m128i OP)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	__m128i	flipped, flipped_d7f7, index_d;
-=======
-	__m128i	flipped, flipped_h_d7f7, index_d;
->>>>>>> 80c96fb (adding back MSB to get flip mask)
-=======
 	__m128i	flipped, flipped_d7f7, index_d;
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	unsigned int outflank_h, outflank_v, outflank_d, index_v;
 	const __m128i mask = _mm_set_epi64x(0x0000102844820100, 0x1010101010101010);	// A2E6H3
 
@@ -1491,25 +1449,11 @@ static __m128i vectorcall flip_E6(const __m128i OP)
 
 	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 6) >> 9) & 0x3f] & rotl8(_mm_extract_epi16(OP, 2) >> 8, 2);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	flipped_d7f7 = _mm_shuffle_epi32(OP, 0xf5);
 	flipped_d7f7 = _mm_and_si128(flipped_d7f7, _mm_set_epi32(0x00200000, 0x00080000, 0x40000000, 0x04000000));
 	flipped_d7f7 = _mm_min_epi16(flipped_d7f7, _mm_shuffle_epi32(flipped_d7f7, SWAP64));
 	flipped = _mm_or_si128(flipped, _mm_unpacklo_epi16(
 		_mm_slli_epi64(_mm_loadl_epi64((__m128i *) &FLIPPED_4_H[outflank_h]), 56), flipped_d7f7));
-<<<<<<< HEAD
-=======
-	flipped_h_d7f7 = _mm_unpacklo_epi64(_mm_srli_epi64(OP, 7), _mm_srli_epi64(OP, 9));
-	flipped_h_d7f7 = _mm_and_si128(flipped_h_d7f7, _mm_shuffle_epi32(OP, DUPHI));
-	flipped_h_d7f7 = _mm_insert_epi16(flipped_h_d7f7, FLIPPED_4_H[outflank_h], 2);
-	flipped_h_d7f7 = _mm_and_si128(flipped_h_d7f7, _mm_set_epi64x(0x0020000000000000, 0x0008ff0000000000));
-	flipped = _mm_or_si128(flipped, flipped_h_d7f7);
->>>>>>> 80c96fb (adding back MSB to get flip mask)
-=======
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 
 	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
 }
@@ -2055,1971 +1999,3 @@ __m128i (vectorcall *mm_flip[])(const __m128i) = {
 	flip_pass, flip_pass
 };
 
-<<<<<<< HEAD
-=======
-/**
- * @file flip_sse.c
- *
- * This module deals with flipping discs.
- *
- * A function is provided for each square of the board. These functions are
- * gathered into an array of functions, so that a fast access to each function
- * is allowed. The generic form of the function take as input the player and
- * the opponent bitboards and return the flipped squares into a bitboard.
- *
- * Given the following notation:
- *  - x = square where we play,
- *  - P = player's disc pattern,
- *  - O = opponent's disc pattern,
- * the basic principle is to read into an array the result of a move. Doing
- * this is easier for a single line ; so we can use arrays of the form:
- *  - ARRAY[x][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns,a first array (OUTFLANK) is used to
- * get the player's discs that surround the opponent discs:
- *  - outflank = OUTFLANK[x][O] & P
- * Because neighbor-of-x bits in the P are not in interest here, outflank
- * is stored in bitwise rotated form to reduce the table size.
- * The result is then used as an index to access a second array giving the
- * flipped discs according to the surrounding player's discs:
- *  - flipped = FLIPPED[x][outflank].
- * (Flipped discs fall into inner 6-bits.)
- * Finally, a precomputed array transform the inner 6-bits disc pattern back into a
- * 64-bits disc pattern, and the flipped squares for each line are gathered and
- * returned to generate moves.
- *
- * If the OUTFLANK search is in LSB to MSB direction, LS1B or carry propagation 
- * can be used to determine contiguous opponent discs.
- * If the OUTFLANK search is in MSB to LSB direction, MS1B using int-float
- * conversion can be used.
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include "bit.h"
-#include <stdio.h>
-
-/** rotated outflank array (indexed with inner 6 bits) */
-static const unsigned char OUTFLANK_2[64] = {	// ...ahgfe
-	0x00, 0x10, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x02, 0x12, 0x00, 0x00,
-	0x00, 0x10, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00,
-	0x00, 0x10, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x02, 0x12, 0x00, 0x00,
-	0x00, 0x10, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x08, 0x18, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_3[64] = {	// ...bahgf
-	0x00, 0x00, 0x10, 0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x11, 0x09, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x10, 0x08, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x12, 0x0a, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x10, 0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x11, 0x09, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x10, 0x08, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x14, 0x0c, 0x00, 0x00, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_4[64] = {	// ...cbahg
-	0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x01, 0x01, 0x01, 0x01, 0x11, 0x11, 0x09, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x02, 0x02, 0x02, 0x02, 0x12, 0x12, 0x0a, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_5[64] = {	// ...dcbah
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x04, 0x02,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x11, 0x11, 0x11, 0x11, 0x09, 0x09, 0x05, 0x03,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-static const unsigned char OUTFLANK_7[64] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x08, 0x08, 0x08, 0x08, 0x04, 0x04, 0x02, 0x01
-};
-
-/** flip array (indexed with rotated outflank, returns inner 6 bits) */
-static const unsigned long long FLIPPED_2_V[25] = {	// ...ahgfe
-	0x0000000000000000, 0x00000000ff000000, 0x000000ffff000000, 0x0000000000000000,
-	0x0000ffffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffffffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000ff00, 0x00000000ff00ff00, 0x000000ffff00ff00, 0x0000000000000000,
-	0x0000ffffff00ff00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffffffff00ff00
-};
-
-static const unsigned long long FLIPPED_3_H[21] = {	// ...bahgf
-	0x0000000000000000, 0x1010101010101010, 0x3030303030303030, 0x0000000000000000,
-	0x7070707070707070, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0606060606060606, 0x1616161616161616, 0x3636363636363636, 0x0000000000000000,
-	0x7676767676767676, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0404040404040404, 0x1414141414141414, 0x3434343434343434, 0x0000000000000000,
-	0x7474747474747474
-};
-
-static const unsigned long long FLIPPED_4_H[19] = {	// ...cbahg
-	0x0000000000000000, 0x2020202020202020, 0x6060606060606060, 0x0000000000000000,
-	0x0e0e0e0e0e0e0e0e, 0x2e2e2e2e2e2e2e2e, 0x6e6e6e6e6e6e6e6e, 0x0000000000000000,
-	0x0c0c0c0c0c0c0c0c, 0x2c2c2c2c2c2c2c2c, 0x6c6c6c6c6c6c6c6c, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0808080808080808, 0x2828282828282828, 0x6868686868686868
-};
-
-static const unsigned long long FLIPPED_4_V[19] = {
-	0x0000000000000000, 0x0000ff0000000000, 0x00ffff0000000000, 0x0000000000000000,
-	0x00000000ffffff00, 0x0000ff00ffffff00, 0x00ffff00ffffff00, 0x0000000000000000,
-	0x00000000ffff0000, 0x0000ff00ffff0000, 0x00ffff00ffff0000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00000000ff000000, 0x0000ff00ff000000, 0x00ffff00ff000000
-};
-
-static const unsigned long long FLIPPED_7_V[38] = {
-	0x0000000000000000, 0x00ffffffffffff00, 0x00ffffffffff0000, 0x0000000000000000,
-	0x00ffffffff000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffffff00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ffff0000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	// static const unsigned long long FLIPPED_5_V[18] = {	// ...dcbah
-		0x0000000000000000, 0x00ff000000000000, 0x000000ffffffff00, 0x00ff00ffffffff00,
-		0x000000ffffff0000, 0x00ff00ffffff0000, 0x0000000000000000, 0x0000000000000000,
-		0x000000ffff000000, 0x00ff00ffff000000, 0x0000000000000000, 0x0000000000000000,
-	0x00ff000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-		0x000000ff00000000, 0x00ff00ff00000000
-};
-
-static const unsigned long long *FLIPPED_5_V = FLIPPED_7_V + 20;
-
-
-#define minusone	_mm_set1_epi32(-1)
-
-#define	SWAP64	0x4e	// for _mm_shuffle_epi32
-#define	SWAP32	0xb1
-#define	DUPLO	0x44
-#define	DUPHI	0xee
-
-
-/*
- * Extract most significant bit set from 4 x i32
- */
-static inline __m128i MS1B_epu31(const __m128i x) {
-	const __m128 exp_mask = _mm_castsi128_ps(_mm_set1_epi32(0xff800000));
-	return _mm_cvtps_epi32(_mm_and_ps(_mm_cvtepi32_ps(x), exp_mask));	// clear mantissa = non msb bits
-}
-
-/*
- * Extract most significant bit set
- * valid only for x < 0x000fffffffffffffULL
- *
- * https://software.intel.com/en-us/forums/intel-isa-extensions/topic/301988
- * https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx/41148578#41148578
- */
-static inline __m128i MS1B_epu52(const __m128i x) {
-	const __m128d k1e52 = _mm_castsi128_pd(_mm_set1_epi64x((1023ULL + 52) << 52));
-	const __m128d exp_mask = _mm_castsi128_pd(_mm_set1_epi64x(0xfff0000000000000));
-	__m128d f;
-	f = _mm_or_pd(_mm_castsi128_pd(x), k1e52);	// construct double x + 2^52
-	f = _mm_sub_pd(f, k1e52);	// extract 2^52 from double -- mantissa will be automatically normalized
-	f = _mm_and_pd(f, exp_mask);	// clear mantissa = non msb bits
-	f = _mm_add_pd(f, k1e52);	// add 2^52 to push back the msb
-	f = _mm_xor_pd(f, k1e52);	// remove exponent
-	return _mm_castpd_si128(f);
-}
-
-/**
- * Make inverted flip mask if opponent's disc are surrounded by player's.
- *
- * 0xffffffffffffffffULL (-1) if outflank is 0
- * 0x0000000000000000ULL ( 0) if a 1 is in 64 bit
- */
-static inline __m128i flipmask (const __m128i outflank) {
-	return _mm_cmpeq_epi32(_mm_shuffle_epi32(outflank, SWAP32), outflank);
-}
-
-/**
- * Load 2 unsigned long longs into xmm.
- */
-static inline __m128i load64x2 (const unsigned long long *x0, const unsigned long long *x1) {
-	return _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) x0)), (__m64 *) x1));
-}
-
-/**
- * Compute flipped discs when playing on square A1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A1(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-	const __m128i mask = _mm_set_epi64x(0x8040201008040200, 0x0101010101010100);
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[1]);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B1(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-	const __m128i mask = _mm_set_epi64x(0x0080402010080400, 0x0202020202020200);
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[2]);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C1(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, flipped_h_b1b2;
-	const __m128i mask = _mm_set_epi64x(0x0000804020100800, 0x0404040404040400);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	flipped_h_b1b2 = _mm_and_si128(_mm_adds_epu8(OO, _mm_set_epi8(0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0x08)), PP);
-	flipped_h_b1b2 = _mm_srli_epi64(_mm_mullo_epi16(flipped_h_b1b2, _mm_set_epi16(0, 0, 0x0002, 0x0200, 0, 0, 0, 0x00ff)), 8);
-	flipped_h_b1b2 = _mm_and_si128(_mm_and_si128(flipped_h_b1b2, OO), _mm_set_epi16(0, 0, 0, 0x0202, 0, 0, 0, 0x0078));
-	flipped = _mm_or_si128(flipped, flipped_h_b1b2);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D1(const __m128i OP)
-{
-	__m128i	outflank_v, flipped, index_d;
-	unsigned int outflank_h, outflank_d;
-	const __m128i mask = _mm_set_epi64x(0x0000008041221408, 0x0808080808080800);	// A4D1H5
-
-	outflank_v = _mm_andnot_si128(_mm_shuffle_epi32(OP, DUPHI), mask);
-	outflank_v = _mm_and_si128(_mm_and_si128(outflank_v, _mm_sub_epi64(_mm_setzero_si128(), outflank_v)), OP);
-	flipped = _mm_sub_epi64(outflank_v, _mm_sub_epi64(flipmask(outflank_v), minusone));
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPHI)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-
-	flipped = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(flipped), (__m64 *) &FLIPPED_3_H[outflank_d]));
-	flipped = _mm_and_si128(mask, flipped);
-
-	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si64(OP), 3);
-	flipped = _mm_or_si128(flipped, _mm_srli_epi64(_mm_loadl_epi64((__m128i *) &FLIPPED_3_H[outflank_h]), 56));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E1(const __m128i OP)
-{
-	__m128i	outflank_v, flipped, index_d;
-	unsigned int outflank_h, outflank_d;
-	const __m128i mask = _mm_set_epi64x(0x0000000182442810, 0x1010101010101000);	// A5E1H4
-
-	outflank_v = _mm_andnot_si128(_mm_shuffle_epi32(OP, DUPHI), mask);
-	outflank_v = _mm_and_si128(_mm_and_si128(outflank_v, _mm_sub_epi64(_mm_setzero_si128(), outflank_v)), OP);
-	flipped = _mm_sub_epi64(outflank_v, _mm_sub_epi64(flipmask(outflank_v), minusone));
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPHI)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_4[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 2);
-
-	flipped = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(flipped), (__m64 *) &FLIPPED_4_H[outflank_d]));
-	flipped = _mm_and_si128(mask, flipped);
-
-	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si64(OP), 2);
-	flipped = _mm_or_si128(flipped, _mm_srli_epi64(_mm_loadl_epi64((__m128i *) &FLIPPED_4_H[outflank_h]), 56));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F1(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, outflank_h, flipped_h_g1g2;
-	const __m128i mask = _mm_set_epi64x(0x0000010204081000, 0x2020202020202000);
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x0000001f))), PP);
-
-	flipped_h_g1g2 = _mm_unpacklo_epi64(outflank_h, PP);
-	flipped_h_g1g2 = _mm_srli_epi64(_mm_mullo_epi16(flipped_h_g1g2, _mm_set_epi16(0, 0, 1, 0x0100, 0, 0, 0, -0x0400)), 9);
-	flipped_h_g1g2 = _mm_and_si128(_mm_and_si128(flipped_h_g1g2, OO), _mm_set_epi16(0, 0, 0, 0x4040, 0, 0, 0, 0x001f));
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	outflank_vd = _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone));
-	flipped = _mm_or_si128(flipped_h_g1g2, _mm_and_si128(mask, outflank_vd));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G1(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-
-	outflank_vd = _mm_andnot_si128(OO, _mm_set_epi64x(0x0001020408102000, 0x4040404040404000));
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_sub_epi64(outflank_vd, _mm_andnot_si128(flipmask(outflank_vd), _mm_set1_epi64x(0x0000000000000100)));
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x0000003f))), PP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0001020408102000, 0x404040404040403e));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H1.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H1(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-
-	outflank_vd = _mm_andnot_si128(OO, _mm_set_epi64x(0x0102040810204000, 0x8080808080808000));
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_sub_epi64(outflank_vd, _mm_andnot_si128(flipmask(outflank_vd), _mm_set1_epi64x(0x0000000000000100)));
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x0000007f))), PP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0102040810204000, 0x808080808080807e));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A2(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-	const __m128i mask = _mm_set_epi64x(0x4020100804020000, 0x0101010101010000);
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[9]);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B2(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-	const __m128i mask = _mm_set_epi64x(0x8040201008040000, 0x0202020202020000);
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[10]);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C2(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, flipped_h_b2b3;
-	const __m128i mask = _mm_set_epi64x(0x0080402010080000, 0x0404040404040000);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	flipped_h_b2b3 = _mm_and_si128(_mm_adds_epu8(OO, _mm_set_epi8(0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0x08, 0)), PP);
-	flipped_h_b2b3 = _mm_mullo_epi16(_mm_srli_epi64(flipped_h_b2b3, 8), _mm_set_epi16(0, 0, 0x0002, 0x0200, 0, 0, 0, 0x00ff));
-	flipped_h_b2b3 = _mm_and_si128(_mm_and_si128(flipped_h_b2b3, OO), _mm_set_epi16(0, 0, 0x0002, 0x0200, 0, 0, 0, 0x7800));
-	flipped = _mm_or_si128(flipped, flipped_h_b2b3);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D2(const __m128i OP)
-{
-	__m128i	outflank_v, flipped, index_d;
-	unsigned int outflank_h, outflank_d;
-	const __m128i mask = _mm_set_epi64x(0x000080412214ff00, 0x0808080808080000);
-
-	outflank_v = _mm_andnot_si128(_mm_shuffle_epi32(OP, DUPHI), mask);
-	outflank_v = _mm_and_si128(_mm_and_si128(outflank_v, _mm_sub_epi64(_mm_setzero_si128(), outflank_v)), OP);
-	flipped = _mm_sub_epi64(outflank_v, _mm_sub_epi64(flipmask(outflank_v), minusone));
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_set1_epi64x(0x0000804122140800)), _mm_setzero_si128());	// A5D2H6
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-
-	flipped = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(flipped), (__m64 *) &FLIPPED_3_H[outflank_d]));
-
-	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 4) >> 9) & 0x3f] & rotl8(_mm_cvtsi128_si64(OP) >> 8, 3);
-	flipped = _mm_insert_epi16(flipped, FLIPPED_3_H[outflank_h], 4);
-	flipped = _mm_and_si128(flipped, mask);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E2(const __m128i OP)
-{
-	__m128i	outflank_v, flipped, index_d;
-	unsigned int outflank_h, outflank_d;
-	const __m128i mask = _mm_set_epi64x(0x000001824428ff00, 0x1010101010100000);
-
-	outflank_v = _mm_andnot_si128(_mm_shuffle_epi32(OP, DUPHI), mask);
-	outflank_v = _mm_and_si128(_mm_and_si128(outflank_v, _mm_sub_epi64(_mm_setzero_si128(), outflank_v)), OP);
-	flipped = _mm_sub_epi64(outflank_v, _mm_sub_epi64(flipmask(outflank_v), minusone));
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_set1_epi64x(0x0000018244281000)), _mm_setzero_si128());	// A6E2H5
-	outflank_d = OUTFLANK_4[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 2);
-
-	flipped = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(flipped), (__m64 *) &FLIPPED_4_H[outflank_d]));
-
-	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 4) >> 9) & 0x3f] & rotl8(_mm_cvtsi128_si64(OP) >> 8, 2);
-	flipped = _mm_insert_epi16(flipped, FLIPPED_4_H[outflank_h], 4);
-	flipped = _mm_and_si128(flipped, mask);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F2(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, outflank_h, flipped_h_g2g3;
-	const __m128i mask = _mm_set_epi64x(0x0001020408100000, 0x2020202020200000);
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x00001f00))), PP);
-
-	flipped_h_g2g3 = _mm_unpacklo_epi64(outflank_h, _mm_srli_epi64(PP, 9));
-	flipped_h_g2g3 = _mm_mullo_epi16(flipped_h_g2g3, _mm_set_epi16(0, 0, 1, 0x0100, 0, 0, 0, -2));
-	flipped_h_g2g3 = _mm_and_si128(_mm_and_si128(flipped_h_g2g3, OO), _mm_set_epi16(0, 0, 0x0040, 0x4000, 0, 0, 0, 0x1f00));
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	outflank_vd = _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone));
-	flipped = _mm_or_si128(flipped_h_g2g3, _mm_and_si128(mask, outflank_vd));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G2(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-
-	outflank_vd = _mm_andnot_si128(OO, _mm_set_epi64x(0x0102040810200000, 0x4040404040400000));
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_sub_epi64(outflank_vd, _mm_andnot_si128(flipmask(outflank_vd), _mm_set1_epi64x(0x0000000000010000)));
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x00003f00))), PP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0102040810200000, 0x4040404040403e00));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H2.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H2(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped;
-
-	outflank_vd = _mm_andnot_si128(OO, _mm_set_epi64x(0x0204081020400000, 0x8080808080800000));
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_sub_epi64(outflank_vd, _mm_andnot_si128(flipmask(outflank_vd), _mm_set1_epi64x(0x0000000000010000)));
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x00007f00))), PP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0204081020400000, 0x8080808080807e00));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A3(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped, flipped_h_a2b2;
-	const __m128i mask = _mm_set_epi64x(0x2010080402000000, 0x0101010101000000);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(PP, _mm_adds_epu8(OO, _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 2, 0, -1)));
-	flipped_h_a2b2 = _mm_srli_epi16(_mm_mullo_epi16(outflank_h, _mm_set_epi16(0, 0, 0, 0x2000, 0, 0, 0x003f, 0x4000)), 6);
-	flipped_h_a2b2 = _mm_and_si128(flipped_h_a2b2, _mm_set_epi64x(0x0000000000000200, 0x00000000007e0100));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(flipped_h_a2b2, OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B3(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped, flipped_h_b2c2;
-	const __m128i mask = _mm_set_epi64x(0x4020100804000000, 0x0202020202000000);
-
-	outflank_vd = _mm_andnot_si128(OO, mask);
-	outflank_vd = _mm_and_si128(_mm_and_si128(outflank_vd, _mm_sub_epi64(_mm_setzero_si128(), outflank_vd)), PP);
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(PP, _mm_adds_epu8(OO, _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 4, 0, -1)));
-	flipped_h_b2c2 = _mm_srli_epi16(_mm_mullo_epi16(outflank_h, _mm_set_epi16(0, 0, 0, 0x1000, 0, 0, 0x001f, 0x2000)), 5);
-	flipped_h_b2c2 = _mm_and_si128(flipped_h_b2c2, _mm_set_epi64x(0x0000000000000400, 0x00000000007c0200));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(flipped_h_b2c2, OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C3(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped, flipped_b4b3b2c2d2;
-	const __m128i mask = _mm_set_epi64x(~0x8040201008000000ULL, ~0x0404040404000000ULL);
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[19]);
-
-	outflank_vd = _mm_and_si128(_mm_andnot_si128(mask, _mm_sub_epi64(_mm_or_si128(OO, mask), minusone)), PP);
-	flipped = _mm_andnot_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	flipped_b4b3b2c2d2 = _mm_and_si128(_mm_shufflelo_epi16(PP, 0x90), _mm_set_epi16(0, 0, 0, 0x0001, 0x0001, 0x0001, 0x0004, 0x0010));	// ...a1a5a3c1e1
-	flipped_b4b3b2c2d2 = _mm_madd_epi16(flipped_b4b3b2c2d2, _mm_set_epi16(0, 0, 0, 0x0200, 0x0200, 0x0002, 0x0100, 0x0080));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_shufflelo_epi16(flipped_b4b3b2c2d2, 0xf8), OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D3(const __m128i OP)
-{
-	__m128i	flipped, flipped_h_c2e2, index_d;
-	unsigned int outflank_h, outflank_v, outflank_d, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0080412214080000, 0x0808080808080808);	// A6D3H7
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 4));
-	outflank_v = OUTFLANK_2[(index_v >> 9) & 0x3f] & rotl8(index_v, 4);
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPHI)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-
-	flipped = _mm_and_si128(load64x2(&FLIPPED_2_V[outflank_v], &FLIPPED_3_H[outflank_d]), mask);
-
-	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 5) >> 1) & 0x3f] & rotl8(_mm_extract_epi16(OP, 1), 3);
-
-	flipped_h_c2e2 = _mm_unpacklo_epi64(_mm_slli_epi64(OP, 9), _mm_slli_epi64(OP, 7));
-	flipped_h_c2e2 = _mm_and_si128(flipped_h_c2e2, _mm_shuffle_epi32(OP, DUPHI));
-	flipped_h_c2e2 = _mm_insert_epi16(flipped_h_c2e2, FLIPPED_3_H[outflank_h], 1);
-	flipped_h_c2e2 = _mm_and_si128(flipped_h_c2e2, _mm_set_epi64x(0x0000000000001000, 0x0000000000ff0400));
-	flipped = _mm_or_si128(flipped, flipped_h_c2e2);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E3(const __m128i OP)
-{
-	__m128i	flipped, flipped_h_d2f2, index_d;
-	unsigned int outflank_h, outflank_v, outflank_d, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0001824428100000, 0x1010101010101010);	// A7E3H6
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 3));
-	outflank_v = OUTFLANK_2[(index_v >> 9) & 0x3f] & rotl8(index_v, 4);
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPHI)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_4[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 2);
-
-	flipped = _mm_and_si128(load64x2(&FLIPPED_2_V[outflank_v], &FLIPPED_4_H[outflank_d]), mask);
-
-	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 5) >> 1) & 0x3f] & rotl8(_mm_extract_epi16(OP, 1), 2);
-
-	flipped_h_d2f2 = _mm_unpacklo_epi64(_mm_slli_epi64(OP, 9), _mm_slli_epi64(OP, 7));
-	flipped_h_d2f2 = _mm_and_si128(flipped_h_d2f2, _mm_shuffle_epi32(OP, DUPHI));
-	flipped_h_d2f2 = _mm_insert_epi16(flipped_h_d2f2, FLIPPED_4_H[outflank_h], 1);
-	flipped_h_d2f2 = _mm_and_si128(flipped_h_d2f2, _mm_set_epi64x(0x0000000000002000, 0x0000000000ff0800));
-	flipped = _mm_or_si128(flipped, flipped_h_d2f2);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F3(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_h, outflank_vd, flipped, flipped_vd, flipped_g4g3g2f2e2;
-	const __m128i mask = _mm_set_epi64x(~0x0102040810000000ULL, ~0x2020202020000000ULL);
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x001f0000))), PP);
-	flipped = _mm_srli_epi16(_mm_mullo_epi16(outflank_h, _mm_set_epi16(0, 0, 0, 0, 0, 0, -0x1000, 0)), 11);
-
-	outflank_vd = _mm_and_si128(_mm_andnot_si128(mask, _mm_sub_epi64(_mm_or_si128(OO, mask), minusone)), PP);
-	flipped_vd = _mm_andnot_si128(mask, _mm_sub_epi64(outflank_vd, _mm_sub_epi64(flipmask(outflank_vd), minusone)));
-	flipped = _mm_or_si128(flipped, flipped_vd);
-
-	flipped_g4g3g2f2e2 = _mm_and_si128(_mm_shufflelo_epi16(PP, 0x90), _mm_set_epi16(0, 0, 0, 0x0080, 0x0080, 0x0080, 0x0020, 0x0008));	// ...h1h5h3f1d1
-	flipped_g4g3g2f2e2 = _mm_srli_epi16(_mm_madd_epi16(flipped_g4g3g2f2e2, _mm_set_epi16(0, 0, 0, 0x0100, 0x0100, 0x0001, 0x0200, 0x0400)), 1);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_shufflelo_epi16(flipped_g4g3g2f2e2, 0xf8), OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G3(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped, flipped_g2f2;
-	const __m128i mask = _mm_set_epi64x(~0x0204081020000000, ~0x4040404040000000);
-
-	outflank_vd = _mm_and_si128(_mm_andnot_si128(mask, _mm_sub_epi64(_mm_or_si128(OO, mask), minusone)), PP);
-	flipped = _mm_sub_epi64(outflank_vd, _mm_andnot_si128(flipmask(outflank_vd), _mm_set1_epi64x(0x0000000001000000)));
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x003f0000))), PP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-
-	flipped_g2f2 = _mm_and_si128(_mm_mullo_epi16(PP, _mm_set_epi16(0, 0, 0, 0x0200, 0, 0, 0, 0x0100)), OO);
-	flipped = _mm_and_si128(_mm_or_si128(flipped, flipped_g2f2), _mm_set_epi64x(0x0204081020002000, 0x40404040403f4000));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H3.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H3(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped, flipped_h2g2;
-	const __m128i mask = _mm_set_epi64x(~0x0408102040000000, ~0x8080808080000000);
-
-	outflank_vd = _mm_and_si128(_mm_andnot_si128(mask, _mm_sub_epi64(_mm_or_si128(OO, mask), minusone)), PP);
-	flipped = _mm_sub_epi64(outflank_vd, _mm_andnot_si128(flipmask(outflank_vd), _mm_set1_epi64x(0x0000000001000000)));
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0, 0x007f0000))), PP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-
-	flipped_h2g2 = _mm_and_si128(_mm_mullo_epi16(PP, _mm_set_epi16(0, 0, 0, 0x0200, 0, 0, 0, 0x0100)), OO);
-	flipped = _mm_and_si128(_mm_or_si128(flipped, flipped_h2g2), _mm_set_epi64x(0x0408102040004000, 0x80808080807f8000));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A4(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflankL, outflankH, flipped;
-	const __m128i maskL = _mm_set_epi32(0, 0x00020408, 0, 0x00010101);
-	const __m128i maskH = _mm_set_epi32(0x10080402, 0, 0x01010101, 0xfe000000);
-
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, maskL)), PP);
-	flipped = _mm_and_si128(maskL, _mm_mul_epu32(outflankL, _mm_set1_epi32(-2)));
-
-	outflankH = _mm_andnot_si128(OO, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PP);
-	outflankH = _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(maskH, outflankH));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B4(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflankL, outflankH, flipped;
-	const __m128i maskL = _mm_set_epi32(0, 0x00040810, 0, 0x00020202);
-	const __m128i maskH = _mm_set_epi32(0x20100804, 0, 0x02020202, 0xfc000000);
-
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, maskL)), PP);
-	flipped = _mm_and_si128(maskL, _mm_mul_epu32(outflankL, _mm_set1_epi32(-2)));
-
-	outflankH = _mm_andnot_si128(OO, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PP);
-	outflankH = _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(maskH, outflankH));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C4(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x00081020, 0x00020100, 0x00040404, 0x03000000);
-	const __m128i maskH = _mm_set_epi32(0x00000102, 0x40201008, 0x04040404, 0xf8000000);
-
-	OH = _mm_shuffle_epi32(OP, 0xfe);
-	PH = _mm_shuffle_epi32(OP, 0x54);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(_mm_move_epi64(flippedH), _mm_shuffle_epi32(flippedH, 0xc8));
-
-	OL = _mm_shuffle_epi32(OP, 0xaa);
-	PL = _mm_shuffle_epi32(OP, 0x00);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_andnot_si128(_mm_add_epi32(_mm_add_epi32(outflankL, outflankL), minusone), maskL);
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedL, _mm_shuffle_epi32(flippedL, 0xf5)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D4(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x00102040, 0x00040201, 0x00080808, 0x07000000);
-	const __m128i maskH = _mm_set_epi32(0x00010204, 0x80402010, 0x08080808, 0xf0000000);
-
-	OH = _mm_shuffle_epi32(OP, 0xfe);
-	PH = _mm_shuffle_epi32(OP, 0x54);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(_mm_move_epi64(flippedH), _mm_shuffle_epi32(flippedH, 0xc8));
-
-	OL = _mm_shuffle_epi32(OP, 0xaa);
-	PL = _mm_shuffle_epi32(OP, 0x00);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_andnot_si128(_mm_add_epi32(_mm_add_epi32(outflankL, outflankL), minusone), maskL);
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedL, _mm_shuffle_epi32(flippedL, 0xf5)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E4(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x00204080, 0x00080402, 0x00101010, 0x0f000000);
-	const __m128i maskH = _mm_set_epi32(0x01020408, 0x00804020, 0x10101010, 0xe0000000);
-
-	OH = _mm_shuffle_epi32(OP, 0xfe);
-	PH = _mm_shuffle_epi32(OP, 0x54);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(_mm_move_epi64(flippedH), _mm_shuffle_epi32(flippedH, 0xc8));
-
-	OL = _mm_shuffle_epi32(OP, 0xaa);
-	PL = _mm_shuffle_epi32(OP, 0x00);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_andnot_si128(_mm_add_epi32(_mm_add_epi32(outflankL, outflankL), minusone), maskL);
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedL, _mm_shuffle_epi32(flippedL, 0xf5)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F4(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x00408000, 0x00100804, 0x00202020, 0x1f000000);
-	const __m128i maskH = _mm_set_epi32(0x02040810, 0x00008040, 0x20202020, 0xc0000000);
-
-	OH = _mm_shuffle_epi32(OP, 0xfe);
-	PH = _mm_shuffle_epi32(OP, 0x54);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(_mm_move_epi64(flippedH), _mm_shuffle_epi32(flippedH, 0xc8));
-
-	OL = _mm_shuffle_epi32(OP, 0xaa);
-	PL = _mm_shuffle_epi32(OP, 0x00);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_andnot_si128(_mm_add_epi32(_mm_add_epi32(outflankL, outflankL), minusone), maskL);
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedL, _mm_shuffle_epi32(flippedL, 0xf5)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G4(const __m128i OP)
-{
-	__m128i	OO, PP, OL, PL, outflankH, outflankL, flipped, flippedL;
-	const __m128i maskL = _mm_set_epi32(0, 0x3f000000, 0x00201008, 0x00404040);
-	const __m128i maskH = _mm_set_epi64x(~0x0408102000000000ULL, ~0x4040404000000000ULL);
-
-	OL = _mm_shuffle_epi32(OP, 0xaa);
-	PL = _mm_shuffle_epi32(OP, 0x00);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL);
-	flipped = _mm_xor_si128(flippedL, _mm_shuffle_epi32(flippedL, 0xf5));
-
-	OO = _mm_shuffle_epi32(OP, DUPHI);
-	PP = _mm_shuffle_epi32(OP, DUPLO);
-	outflankH = _mm_and_si128(_mm_andnot_si128(maskH, _mm_sub_epi64(_mm_or_si128(OO, maskH), minusone)), PP);
-	flipped = _mm_or_si128(flipped, _mm_andnot_si128(maskH, _mm_sub_epi64(outflankH, _mm_shuffle_epi32(outflankH, SWAP32))));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H4.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H4(const __m128i OP)
-{
-	__m128i	OO, PP, OL, PL, outflankH, outflankL, flipped, flippedL;
-	const __m128i maskL = _mm_set_epi32(0, 0x7f000000, 0x00402010, 0x00808080);
-	const __m128i maskH = _mm_set_epi64x(~0x0810204000000000ULL, ~0x8080808000000000ULL);
-
-	OL = _mm_shuffle_epi32(OP, 0xaa);
-	PL = _mm_shuffle_epi32(OP, 0x00);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL);
-	flipped = _mm_xor_si128(flippedL, _mm_shuffle_epi32(flippedL, 0xf5));
-
-	OO = _mm_shuffle_epi32(OP, DUPHI);
-	PP = _mm_shuffle_epi32(OP, DUPLO);
-	outflankH = _mm_and_si128(_mm_andnot_si128(maskH, _mm_sub_epi64(_mm_or_si128(OO, maskH), minusone)), PP);
-	flipped = _mm_or_si128(flipped, _mm_andnot_si128(maskH, _mm_sub_epi64(outflankH, _mm_shuffle_epi32(outflankH, SWAP32))));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A5(const __m128i OP)
-{
-	__m128i	OO, PP, OH, PH, outflankL, outflankH, flipped, flippedH;
-	const __m128i maskL = _mm_set_epi32(0, 0x02040810, 0, 0x01010101);
-	const __m128i maskH = _mm_set_epi32(0x08040200, 0, 0x01010100, 0x000000fe);
-
-	OH = _mm_shuffle_epi32(OP, 0xef);
-	PH = _mm_shuffle_epi32(OP, 0x45);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_add_epi32(outflankH, minusone);
-	flippedH = _mm_and_si128(maskH, _mm_add_epi32(flippedH, _mm_srli_epi32(flippedH, 31)));
-	flipped = _mm_xor_si128(flippedH, _mm_shuffle_epi32(flippedH, 0xa0));
-
-	OO = _mm_shuffle_epi32(OP, DUPHI);
-	PP = _mm_shuffle_epi32(OP, DUPLO);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, maskL)), PP);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(maskL, _mm_mul_epu32(outflankL, _mm_set1_epi32(-2))));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B5(const __m128i OP)
-{
-	__m128i	OO, PP, OH, PH, outflankL, outflankH, flipped, flippedH;
-	const __m128i maskL = _mm_set_epi32(0, 0x04081020, 0, 0x02020202);
-	const __m128i maskH = _mm_set_epi32(0x10080400, 0, 0x02020200, 0x000000fc);
-
-	OH = _mm_shuffle_epi32(OP, 0xef);
-	PH = _mm_shuffle_epi32(OP, 0x45);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_add_epi32(outflankH, minusone);
-	flippedH = _mm_and_si128(maskH, _mm_add_epi32(flippedH, _mm_srli_epi32(flippedH, 31)));
-	flipped = _mm_xor_si128(flippedH, _mm_shuffle_epi32(flippedH, 0xa0));
-
-	OO = _mm_shuffle_epi32(OP, DUPHI);
-	PP = _mm_shuffle_epi32(OP, DUPLO);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, maskL)), PP);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(maskL, _mm_mul_epu32(outflankL, _mm_set1_epi32(-2))));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C5(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x08102040, 0x02010000, 0x00000003, 0x04040404);
-	const __m128i maskH = _mm_set_epi32(0x00010200, 0x20100800, 0x000000f8, 0x04040400);
-
-	OL = _mm_shuffle_epi32(OP, 0xae);
-	PL = _mm_shuffle_epi32(OP, 0x04);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL);
-	flipped = _mm_or_si128(_mm_move_epi64(flippedL), _mm_shuffle_epi32(flippedL, 0x76));
-
-	OH = _mm_shuffle_epi32(OP, 0xff);
-	PH = _mm_shuffle_epi32(OP, 0x55);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedH, _mm_shuffle_epi32(flippedH, 0xa0)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D5(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x10204080, 0x04020100, 0x00000007, 0x08080808);
-	const __m128i maskH = _mm_set_epi32(0x01020400, 0x40201000, 0x000000f0, 0x08080800);
-
-	OL = _mm_shuffle_epi32(OP, 0xae);
-	PL = _mm_shuffle_epi32(OP, 0x04);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL);
-	flipped = _mm_or_si128(_mm_move_epi64(flippedL), _mm_shuffle_epi32(flippedL, 0x76));
-
-	OH = _mm_shuffle_epi32(OP, 0xff);
-	PH = _mm_shuffle_epi32(OP, 0x55);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedH, _mm_shuffle_epi32(flippedH, 0xa0)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E5(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x20408000, 0x08040201, 0x0000000f, 0x10101010);
-	const __m128i maskH = _mm_set_epi32(0x02040800, 0x80402000, 0x000000e0, 0x10101000);
-
-	OL = _mm_shuffle_epi32(OP, 0xae);
-	PL = _mm_shuffle_epi32(OP, 0x04);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL);
-	flipped = _mm_or_si128(_mm_move_epi64(flippedL), _mm_shuffle_epi32(flippedL, 0x76));
-
-	OH = _mm_shuffle_epi32(OP, 0xff);
-	PH = _mm_shuffle_epi32(OP, 0x55);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedH, _mm_shuffle_epi32(flippedH, 0xa0)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F5(const __m128i OP)
-{
-	__m128i OL, OH, PL, PH, outflankL, outflankH, flippedL, flippedH, flipped;
-	const __m128i maskL = _mm_set_epi32(0x40800000, 0x10080402, 0x0000001f, 0x20202020);
-	const __m128i maskH = _mm_set_epi32(0x04081000, 0x00804000, 0x000000c0, 0x20202000);
-
-	OL = _mm_shuffle_epi32(OP, 0xae);
-	PL = _mm_shuffle_epi32(OP, 0x04);
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OL, maskL)), PL);
-	flippedL = _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL);
-	flipped = _mm_or_si128(_mm_move_epi64(flippedL), _mm_shuffle_epi32(flippedL, 0x76));
-
-	OH = _mm_shuffle_epi32(OP, 0xff);
-	PH = _mm_shuffle_epi32(OP, 0x55);
-	outflankH = _mm_andnot_si128(OH, maskH);
-	outflankH = _mm_and_si128(_mm_and_si128(outflankH, _mm_sub_epi32(_mm_setzero_si128(), outflankH)), PH);
-	flippedH = _mm_and_si128(maskH, _mm_sub_epi32(outflankH, _mm_sub_epi32(_mm_cmpeq_epi32(outflankH, _mm_setzero_si128()), minusone)));
-	flipped = _mm_or_si128(flipped, _mm_xor_si128(flippedH, _mm_shuffle_epi32(flippedH, 0xa0)));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G5(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflankH, outflankL, flipped;
-	const __m128i maskL = _mm_set_epi32(0, 0x20100804, 0x0000003f, 0x40404040);
-	const __m128i maskH = _mm_set_epi64x(~0x0810200000000000ULL, ~0x4040400000000000ULL);
-
-	outflankH = _mm_and_si128(_mm_andnot_si128(maskH, _mm_sub_epi64(_mm_or_si128(OO, maskH), minusone)), PP);
-	flipped = _mm_andnot_si128(maskH, _mm_sub_epi64(outflankH, _mm_shuffle_epi32(outflankH, SWAP32)));
-
-	outflankL = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, maskL)), PP);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_sub_epi32(_mm_setzero_si128(), _mm_add_epi32(outflankL, outflankL)), maskL));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H5.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H5(const __m128i OP)
-{
-	__m128i	outflank_h, flipped;
-	unsigned int outflank_v, outflank_d, index_v, index_d;
-	const __m128i mask_d = _mm_set1_epi64x(0x1020408040201008);	// D1H5E8
-
-	index_v = _mm_movemask_epi8(OP);
-	outflank_v = OUTFLANK_4[(index_v >> 9) & 0x3f] & rotl8(index_v, 2);
-
-	index_d = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_and_si128(OP, mask_d), mask_d));
-	outflank_d = OUTFLANK_4[(index_d >> 9) & 0x3f] & rotl8(index_d, 2);
-
-	flipped = load64x2(&FLIPPED_4_V[outflank_v], &FLIPPED_4_V[outflank_d]);
-
-	outflank_h = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(_mm_shuffle_epi32(OP, DUPHI), _mm_set_epi32(0, 0, 0x0000007f, 0))), OP);
-	flipped = _mm_sub_epi8(flipped, _mm_add_epi8(outflank_h, outflank_h));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x1020408040201008, 0x8080807e80808080));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A6(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h_a7b7, flipped;
-	const __m128i mask1 = _mm_set_epi64x(0x0000000204081020, 0x0000000101010101);
-	const __m128i mask2 = _mm_set_epi64x(~0x0402000000000000ULL, ~0x0101fe0000000000ULL);
-
-	outflank_h_a7b7 = _mm_and_si128(_mm_andnot_si128(mask2, _mm_sub_epi16(_mm_or_si128(OO, mask2), minusone)), PP);
-	flipped = _mm_andnot_si128(mask2, _mm_mulhi_epu16(outflank_h_a7b7, minusone));
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask1)), PP);
-	outflank_vd = _mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(outflank_vd, mask1));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B6(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h_b7c7, flipped;
-	const __m128i mask1 = _mm_set_epi64x(0x0000000408102040, 0x0000000202020202);
-	const __m128i mask2 = _mm_set_epi64x(~0x0804000000000000ULL, ~0x0202fc0000000000ULL);
-
-	outflank_h_b7c7 = _mm_and_si128(_mm_andnot_si128(mask2, _mm_sub_epi16(_mm_or_si128(OO, mask2), minusone)), PP);
-	flipped = _mm_andnot_si128(mask2, _mm_mulhi_epu16(outflank_h_b7c7, minusone));
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask1)), PP);
-	outflank_vd = _mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(outflank_vd, mask1));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C6(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, outflank_h, flipped, flipped_b5b6b7c7d7;
-	const __m128i mask = _mm_set_epi64x(0x0000000810204080, 0x0000000404040404);
-	const __m128i next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[43]);
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_and_si128(_mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd)), mask);
-
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	flipped_b5b6b7c7d7 = _mm_and_si128(_mm_shufflehi_epi16(PP, 0xf4), _mm_set_epi64x(0x0400100001000000, 0x0100010000000000));	// c8e8a4.a8a6..
-	flipped_b5b6b7c7d7 = _mm_madd_epi16(flipped_b5b6b7c7d7, _mm_set_epi16(0x0100, 0x0080, 0x0200, 0, 0x0200, 2, 0, 0));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_shufflehi_epi16(flipped_b5b6b7c7d7, 0xd0), OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D6(const __m128i OP)
-{
-	__m128i	flipped, flipped_h_c7e7, index_d;
-	unsigned int outflank_h, outflank_v, outflank_d, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0000081422418000, 0x0808080808080808);	// A3D6H2
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 4));
-	outflank_v = OUTFLANK_5[(index_v >> 9) & 0x3f] & rotl8(index_v, 1);
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPHI)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-
-	flipped = _mm_and_si128(load64x2(&FLIPPED_5_V[outflank_v], &FLIPPED_3_H[outflank_d]), mask);
-
-	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 6) >> 9) & 0x3f] & rotl8(_mm_extract_epi16(OP, 2) >> 8, 3);
-
-	flipped_h_c7e7 = _mm_unpacklo_epi64(_mm_srli_epi64(OP, 7), _mm_srli_epi64(OP, 9));
-	flipped_h_c7e7 = _mm_and_si128(flipped_h_c7e7, _mm_shuffle_epi32(OP, DUPHI));
-	flipped_h_c7e7 = _mm_insert_epi16(flipped_h_c7e7, FLIPPED_3_H[outflank_h], 2);
-	flipped_h_c7e7 = _mm_and_si128(flipped_h_c7e7, _mm_set_epi64x(0x0010000000000000, 0x0004ff0000000000));
-	flipped = _mm_or_si128(flipped, flipped_h_c7e7);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E6(const __m128i OP)
-{
-	__m128i	flipped, flipped_h_d7f7, index_d;
-	unsigned int outflank_h, outflank_v, outflank_d, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0000102844820100, 0x1010101010101010);	// A2E6H3
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 3));
-	outflank_v = OUTFLANK_5[(index_v >> 9) & 0x3f] & rotl8(index_v, 1);
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPHI)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_4[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 2);
-
-	flipped = _mm_and_si128(load64x2(&FLIPPED_5_V[outflank_v], &FLIPPED_4_H[outflank_d]), mask);
-
-	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 6) >> 9) & 0x3f] & rotl8(_mm_extract_epi16(OP, 2) >> 8, 2);
-
-	flipped_h_d7f7 = _mm_unpacklo_epi64(_mm_srli_epi64(OP, 7), _mm_srli_epi64(OP, 9));
-	flipped_h_d7f7 = _mm_and_si128(flipped_h_d7f7, _mm_shuffle_epi32(OP, DUPHI));
-	flipped_h_d7f7 = _mm_insert_epi16(flipped_h_d7f7, FLIPPED_4_H[outflank_h], 2);
-	flipped_h_d7f7 = _mm_and_si128(flipped_h_d7f7, _mm_set_epi64x(0x0020000000000000, 0x0008ff0000000000));
-	flipped = _mm_or_si128(flipped, flipped_h_d7f7);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F6(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank, flipped, flipped_g5g6g7f7e7;
-
-	outflank = MS1B_epu52(_mm_andnot_si128(OO, _mm_set_epi64x(0x0000002020202020, 0x0000001008040201)));
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x00001f00, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[39]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0000002020202020, 0x00001e1008040201));
-
-	flipped_g5g6g7f7e7 = _mm_and_si128(_mm_shufflehi_epi16(PP, 0xf9), _mm_set_epi64x(0x2000080080008000, 0x8000000000000000));	// f8d8h6h4h8...
-	flipped_g5g6g7f7e7 = _mm_madd_epi16(flipped_g5g6g7f7e7, _mm_set_epi16(0x0100, 0x0200, -0x8000, -0x0080, -0x0080, 0, 0, 0));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_shufflehi_epi16(flipped_g5g6g7f7e7, 0xd0), OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G6(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank, flipped;
-
-	outflank = MS1B_epu52(_mm_andnot_si128(OO, _mm_set_epi64x(0x0000004040404040, 0x0000002010080402)));
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x00003f00, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_set_epi64x(0x0000800000000000, 0x0000808000000000), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_mulhi_epu16(PP, _mm_set_epi16(0x0100, 0, 0, 0, 0x0200, 0, 0, 0)), OO));	// g7f7
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0040004040404040, 0x00203e2010080402));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H6.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H6(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank, flipped;
-
-	outflank = MS1B_epu52(_mm_andnot_si128(OO, _mm_set_epi64x(0x0000008080808080, 0x0000004020100804)));
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x00007f00, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_set_epi64x(0x0000800000000000, 0x0000808000000000), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_mulhi_epu16(PP, _mm_set_epi16(0x0100, 0, 0, 0, 0x0200, 0, 0, 0)), OO));	// h7g7
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0080008080808080, 0x00407e4020100804));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A7(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, outflank_h, next_h;
-	const __m128i mask = _mm_set_epi64x(0x0000020408102040, 0x0000010101010101);
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_andnot_si128(_mm_add_epi64(_mm_add_epi64(outflank_vd, outflank_vd), minusone), mask);
-
-	next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[49]);
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B7(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, outflank_h, next_h;
-	const __m128i mask = _mm_set_epi64x(0x0000040810204080, 0x0000020202020202);
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_andnot_si128(_mm_add_epi64(_mm_add_epi64(outflank_vd, outflank_vd), minusone), mask);
-
-	next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[50]);
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C7(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	flipped, outflank_vd, flipped_h_b6b7;
-	const __m128i mask = _mm_set_epi64x(0x0000081020408000, 0x0000040404040404);
-
-	flipped_h_b6b7 = _mm_and_si128(_mm_adds_epu8(OO, _mm_set_epi8(0, -1, 0, -1, 0, 0, 0, 0, 0, 0x08, 0, 0, 0, 0, 0, 0)), PP);
-	flipped_h_b6b7 = _mm_srli_epi64(_mm_mullo_epi16(flipped_h_b6b7, _mm_set_epi16(0x0020, 0x2000, 0, 0, 0x000f, 0, 0, 0)), 4);
-	flipped_h_b6b7 = _mm_and_si128(_mm_and_si128(flipped_h_b6b7, OO), _mm_set_epi16(0x0002, 0x0200, 0, 0, 0x0078, 0, 0, 0));
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_or_si128(flipped_h_b6b7, _mm_and_si128(_mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd)), mask));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D7(const __m128i OP)
-{
-	__m128i	flipped, index_d;
-	unsigned int outflank_h, outflank_d, outflank_v, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0000080808080808, 0x00ff142241800000);
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 4));
-	outflank_v = OUTFLANK_7[((index_v >> 9) & 0x1f) + 32] & index_v;
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_set1_epi64x(0x0008142241800000)), _mm_setzero_si128());	// A4D7H3
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-
-	flipped = load64x2(&FLIPPED_3_H[outflank_d], &FLIPPED_7_V[outflank_v]);
-
-	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 7) >> 1) & 0x3f] & rotl8(_mm_extract_epi16(OP, 3), 3);
-	flipped = _mm_and_si128(_mm_insert_epi16(flipped, FLIPPED_3_H[outflank_h], 3), mask);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square E7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E7(const __m128i OP)
-{
-	__m128i	flipped, index_d;
-	unsigned int outflank_h, outflank_d, outflank_v, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0000101010101010, 0x00ff284482010000);
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 3));
-	outflank_v = OUTFLANK_7[((index_v >> 9) & 0x1f) + 32] & index_v;
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_set1_epi64x(0x0010284482010000)), _mm_setzero_si128());	// A3E7H4
-	outflank_d = OUTFLANK_4[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 2);
-
-	flipped = load64x2(&FLIPPED_4_H[outflank_d], &FLIPPED_7_V[outflank_v]);
-
-	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 7) >> 1) & 0x3f] & rotl8(_mm_extract_epi16(OP, 3), 2);
-	flipped = _mm_and_si128(_mm_insert_epi16(flipped, FLIPPED_4_H[outflank_h], 3), mask);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F7(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank, flipped, flipped_g6g7;
-
-	outflank = MS1B_epu52(_mm_andnot_si128(OO, _mm_set_epi64x(0x0000202020202020, 0x0000100804020100)));
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x001f0000, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[47]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0000202020202020, 0x001e100804020100));
-
-	flipped_g6g7 = _mm_srli_epi64(_mm_and_si128(PP, _mm_set_epi64x(0x0080008000000000, 0)), 17);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_packus_epi16(flipped_g6g7, flipped_g6g7), OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G7(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i outflank, flipped;
-
-	outflank = MS1B_epu52(_mm_andnot_si128(OO, _mm_set_epi64x(0x0000404040404040, 0x0000201008040201)));
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x003f0000, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[47]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0000404040404040, 0x003e201008040201));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H7.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H7(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i outflank, flipped;
-
-	outflank = MS1B_epu52(_mm_andnot_si128(OO, _mm_set_epi64x(0x0000808080808080, 0x0000402010080402)));
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x007f0000, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[47]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0000808080808080, 0x007e402010080402));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square A8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_A8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, outflank_h, next_h;
-	const __m128i mask = _mm_set_epi64x(0x0002040810204080, 0x0001010101010101);
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_andnot_si128(_mm_add_epi64(_mm_add_epi64(outflank_vd, outflank_vd), minusone), mask);
-
-	next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[57]);
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square B8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_B8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank_vd, flipped, outflank_h, next_h;
-	const __m128i mask = _mm_set_epi64x(0x0004081020408000, 0x0002020202020202);
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_andnot_si128(_mm_add_epi64(_mm_add_epi64(outflank_vd, outflank_vd), minusone), mask);
-
-	next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[58]);
-	outflank_h = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank_h, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square C8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_C8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	flipped, outflank_vd, flipped_h_b7b8;
-	const __m128i mask = _mm_set_epi64x(0x0008102040800000, 0x0004040404040404);
-
-	flipped_h_b7b8 = _mm_and_si128(_mm_adds_epu8(OO, _mm_set_epi8(-1, 0, -1, 0, 0, 0, 0, 0, 0x08, 0, 0, 0, 0, 0, 0, 0)), PP);
-	flipped_h_b7b8 = _mm_slli_epi64(_mm_mullo_epi16(_mm_srli_epi64(flipped_h_b7b8, 8), _mm_set_epi16(0x0020, 0x2000, 0, 0, 0x000f, 0, 0, 0)), 4);
-	flipped_h_b7b8 = _mm_and_si128(_mm_and_si128(flipped_h_b7b8, OO), _mm_set_epi16(0x0202, 0, 0, 0, 0x7800, 0, 0, 0));
-
-	outflank_vd = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OO, mask)), PP);
-	flipped = _mm_or_si128(flipped_h_b7b8, _mm_and_si128(_mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank_vd, outflank_vd)), mask));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square D8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_D8(const __m128i OP)
-{
-	__m128i	flipped, index_d;
-	unsigned int outflank_h, outflank_d;
-	const __m128i mask = _mm_set_epi64x(0x0008080808080808, 0x0814224180000000);	// A5D8H4
-#if 1	// TLU x 2 - 42(gcc)/45(VC)
-	unsigned int outflank_v, index_v;
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 4));
-	outflank_v = OUTFLANK_7[(index_v >> 9) & 0x3f] & index_v;
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPLO)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-
-	flipped = _mm_and_si128(mask, load64x2(&FLIPPED_3_H[outflank_d], &FLIPPED_7_V[outflank_v]));
-
-#else	// TLU + MS1B - 45(gcc)/52(VC)
-	__m128i	outflank_v;
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPLO)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_3[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 3);
-	flipped = _mm_loadl_epi64((__m128i *) &FLIPPED_3_H[outflank_d]);
-
-	outflank_v = _mm_and_si128(MS1B_epu52(_mm_andnot_si128(OP, mask)), _mm_slli_si128(OP, 8));
-	flipped = _mm_and_si128(mask, _mm_sub_epi64(flipped, _mm_add_epi64(outflank_v, outflank_v)));
-#endif
-
-	outflank_h = OUTFLANK_3[(_mm_extract_epi16(OP, 7) >> 9) & 0x3f] & rotl8(_mm_extract_epi16(OP, 3) >> 8, 3);
-	flipped = _mm_or_si128(flipped, _mm_slli_epi64(_mm_loadl_epi64((__m128i *) &FLIPPED_3_H[outflank_h]), 56));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-#if 0	// MS1B x 2 - 51(gcc)/52(VC)
-static __m128i vectorcall flip_D8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	flipped, outflank, next_h;
-	const __m128i mask1 = _mm_set_epi64x(0x0008080808080808, 0x0010204080000000);
-	const __m128i mask2 = _mm_set_epi64x(0x0700000000000000, 0x0004020100000000);
-
-	outflank = _mm_and_si128(_mm_slli_epi64(MS1B_epu52(_mm_srli_epi64(_mm_andnot_si128(OO, mask1), 1)), 1), PP);
-	flipped = _mm_and_si128(mask1, _mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank, outflank)));
-
-	outflank = _mm_and_si128(MS1B_epu31(_mm_andnot_si128(OO, mask2)), PP);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(mask2, _mm_sub_epi64(_mm_setzero_si128(), _mm_add_epi64(outflank, outflank))));
-
-	next_h = _mm_loadl_epi64((__m128i *) &X_TO_BIT[60]);
-	outflank = _mm_and_si128(_mm_add_epi8(OO, next_h), PP);
-	flipped = _mm_or_si128(flipped, _mm_subs_epu8(outflank, next_h));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-#endif
-
-/**
- * Compute flipped discs when playing on square E8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_E8(const __m128i OP)
-{
-	__m128i	flipped, index_d;
-	unsigned int outflank_h, outflank_d, outflank_v, index_v;
-	const __m128i mask = _mm_set_epi64x(0x0010101010101010, 0x1028448201000000);	// A4E8H5
-
-	index_v = _mm_movemask_epi8(_mm_slli_epi64(OP, 3));
-	outflank_v = OUTFLANK_7[(index_v >> 9) & 0x3f] & index_v;
-
-	index_d = _mm_sad_epu8(_mm_and_si128(OP, _mm_shuffle_epi32(mask, DUPLO)), _mm_setzero_si128());
-	outflank_d = OUTFLANK_4[(_mm_extract_epi16(index_d, 4) >> 1) & 0x3f] & rotl8(_mm_cvtsi128_si32(index_d), 2);
-
-	flipped = _mm_and_si128(mask, load64x2(&FLIPPED_4_H[outflank_d], &FLIPPED_7_V[outflank_v]));
-
-	outflank_h = OUTFLANK_4[(_mm_extract_epi16(OP, 7) >> 9) & 0x3f] & rotl8(_mm_extract_epi16(OP, 3) >> 8, 2);
-	flipped = _mm_or_si128(flipped, _mm_slli_epi64(_mm_loadl_epi64((__m128i *) &FLIPPED_4_H[outflank_h]), 56));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square F8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_F8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i	outflank, flipped, flipped_g7g8;
-
-	outflank = _mm_andnot_si128(OO, _mm_set_epi64x(0x0020202020202020, 0x0010080402010000));
-	outflank = _mm_slli_epi64(MS1B_epu52(_mm_srli_epi64(outflank, 4)), 4);
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x1f000000, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[55]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0020202020202020, 0x1e10080402010000));
-
-	flipped_g7g8 = _mm_srli_epi64(_mm_and_si128(PP, _mm_set_epi64x(0x8000800000000000, 0)), 9);
-	flipped = _mm_or_si128(flipped, _mm_and_si128(_mm_packus_epi16(flipped_g7g8, flipped_g7g8), OO));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square G8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_G8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i outflank, flipped;
-
-	outflank = _mm_andnot_si128(OO, _mm_set_epi64x(0x0040404040404040, 0x0020100804020100));
-	outflank = _mm_slli_epi64(MS1B_epu52(_mm_srli_epi64(outflank, 4)), 4);
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x3f000000, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[55]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, _mm_set_epi64x(0x0040404040404040, 0x3e20100804020100));
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute flipped discs when playing on square H8.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_H8(const __m128i OP)
-{
-	__m128i PP = _mm_shuffle_epi32(OP, DUPLO);
-	__m128i OO = _mm_shuffle_epi32(OP, DUPHI);
-	__m128i outflank, flipped;
-	__m128i mask = _mm_set_epi64x(0x0080808080808080, 0x7e40201008040201);
-
-	outflank = _mm_andnot_si128(OO, mask);
-	outflank = _mm_min_epu8(outflank, _mm_set_epi64x(0x0008080808080808, 0x0004020108040201));	// pack to 52 bits
-	outflank = _mm_mullo_epi16(MS1B_epu52(outflank), _mm_set_epi16(16, 16, 16, 16, 16, 16, 1, 1));	// unpack
-	outflank = _mm_or_si128(outflank, MS1B_epu31(_mm_andnot_si128(OO, _mm_set_epi32(0, 0, 0x7f000000, 0))));
-	outflank = _mm_and_si128(outflank, PP);
-
-	flipped = _mm_sub_epi64(_mm_loadl_epi64((__m128i *) &X_TO_BIT[55]), _mm_add_epi64(outflank, outflank));
-	flipped = _mm_and_si128(flipped, mask);
-
-	return _mm_or_si128(flipped, _mm_shuffle_epi32(flipped, SWAP64));
-}
-
-/**
- * Compute (zero-) flipped discs when plassing.
- *
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-static __m128i vectorcall flip_pass(const __m128i OP)
-{
-	(void) OP; // useless code to shut-up compiler warning
-	return _mm_setzero_si128();
-}
-
-
-/** Array of functions to compute flipped discs */
-__m128i (vectorcall *mm_flip[])(const __m128i) = {
-	flip_A1, flip_B1, flip_C1, flip_D1, flip_E1, flip_F1, flip_G1, flip_H1,
-	flip_A2, flip_B2, flip_C2, flip_D2, flip_E2, flip_F2, flip_G2, flip_H2,
-	flip_A3, flip_B3, flip_C3, flip_D3, flip_E3, flip_F3, flip_G3, flip_H3,
-	flip_A4, flip_B4, flip_C4, flip_D4, flip_E4, flip_F4, flip_G4, flip_H4,
-	flip_A5, flip_B5, flip_C5, flip_D5, flip_E5, flip_F5, flip_G5, flip_H5,
-	flip_A6, flip_B6, flip_C6, flip_D6, flip_E6, flip_F6, flip_G6, flip_H6,
-	flip_A7, flip_B7, flip_C7, flip_D7, flip_E7, flip_F7, flip_G7, flip_H7,
-	flip_A8, flip_B8, flip_C8, flip_D8, flip_E8, flip_F8, flip_G8, flip_H8,
-	flip_pass, flip_pass
-};
-
->>>>>>> b3f048d (copyright changes)
-=======
->>>>>>> 80c96fb (adding back MSB to get flip mask)
diff --git a/src/flip_sse_bitscan.c b/src/flip_sse_bitscan.c
index ec914ac..e084fbc 100644
--- a/src/flip_sse_bitscan.c
+++ b/src/flip_sse_bitscan.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file flip_sse_bitscan.c
  *
@@ -308,314 +304,3 @@ unsigned long long flip(const unsigned long long P, const unsigned long long O,
 
 	return flipped | _mm_cvtsi128_si64(outflank17);
 }
-<<<<<<< HEAD
-=======
-/**
- * @file flip_sse_bitscan.c
- *
- * This module deals with flipping discs.
- *
- * A function is provided for each square of the board. These functions are
- * gathered into an array of functions, so that a fast access to each function
- * is allowed. The generic form of the function take as input the player and
- * the opponent bitboards and return the flipped squares into a bitboard.
- *
- * Given the following notation:
- *  - x = square where we play,
- *  - P = player's disc pattern,
- *  - O = opponent's disc pattern,
- * the basic principle is to read into an array the result of a move. Doing
- * this is easier for a single line ; so we can use arrays of the form:
- *  - ARRAY[x][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns,a first array (OUTFLANK) is used to
- * get the player's discs that surround the opponent discs:
- *  - outflank = OUTFLANK[x][O] & P
- * (Only inner 6-bits of the P are in interest here.)
- * The result is then used as an index to access a second array giving the
- * flipped discs according to the surrounding player's discs:
- *  - flipped = FLIPPED[x][outflank].
- * (Flipped discs fall into inner 6-bits.)
- * Finally, a precomputed array transform the inner 6-bits disc pattern back into a
- * 64-bits disc pattern, and the flipped squares for each line are gathered and
- * returned to generate moves.
- *
- * If the OUTFLANK search is in LSB to MSB direction, carry propagation 
- * can be used to determine contiguous opponent discs.
- * If the OUTFLANK search is in MSB to LSB direction, CONTIG_X tables
- * are used to determine coutiguous opponent discs.
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include "bit.h"
-
-static const V2DI maskl[64][2] = {
-	{ {{ ~0x00000000000000fe, ~0x0000000000000000 }}, {{ ~0x0101010101010100, ~0x8040201008040200 }} },
-	{ {{ ~0x00000000000000fc, ~0x0000000000000100 }}, {{ ~0x0202020202020200, ~0x0080402010080400 }} },
-	{ {{ ~0x00000000000000f8, ~0x0000000000010200 }}, {{ ~0x0404040404040400, ~0x0000804020100800 }} },
-	{ {{ ~0x00000000000000f0, ~0x0000000001020400 }}, {{ ~0x0808080808080800, ~0x0000008040201000 }} },
-	{ {{ ~0x00000000000000e0, ~0x0000000102040800 }}, {{ ~0x1010101010101000, ~0x0000000080402000 }} },
-	{ {{ ~0x00000000000000c0, ~0x0000010204081000 }}, {{ ~0x2020202020202000, ~0x0000000000804000 }} },
-	{ {{ ~0x0000000000000080, ~0x0001020408102000 }}, {{ ~0x4040404040404000, ~0x0000000000008000 }} },
-	{ {{ ~0x0000000000000000, ~0x0102040810204000 }}, {{ ~0x8080808080808000, ~0x0000000000000000 }} },
-	{ {{ ~0x000000000000fe00, ~0x0000000000000000 }}, {{ ~0x0101010101010000, ~0x4020100804020000 }} },
-	{ {{ ~0x000000000000fc00, ~0x0000000000010000 }}, {{ ~0x0202020202020000, ~0x8040201008040000 }} },
-	{ {{ ~0x000000000000f800, ~0x0000000001020000 }}, {{ ~0x0404040404040000, ~0x0080402010080000 }} },
-	{ {{ ~0x000000000000f000, ~0x0000000102040000 }}, {{ ~0x0808080808080000, ~0x0000804020100000 }} },
-	{ {{ ~0x000000000000e000, ~0x0000010204080000 }}, {{ ~0x1010101010100000, ~0x0000008040200000 }} },
-	{ {{ ~0x000000000000c000, ~0x0001020408100000 }}, {{ ~0x2020202020200000, ~0x0000000080400000 }} },
-	{ {{ ~0x0000000000008000, ~0x0102040810200000 }}, {{ ~0x4040404040400000, ~0x0000000000800000 }} },
-	{ {{ ~0x0000000000000000, ~0x0204081020400000 }}, {{ ~0x8080808080800000, ~0x0000000000000000 }} },
-	{ {{ ~0x0000000000fe0000, ~0x0000000000000000 }}, {{ ~0x0101010101000000, ~0x2010080402000000 }} },
-	{ {{ ~0x0000000000fc0000, ~0x0000000001000000 }}, {{ ~0x0202020202000000, ~0x4020100804000000 }} },
-	{ {{ ~0x0000000000f80000, ~0x0000000102000000 }}, {{ ~0x0404040404000000, ~0x8040201008000000 }} },
-	{ {{ ~0x0000000000f00000, ~0x0000010204000000 }}, {{ ~0x0808080808000000, ~0x0080402010000000 }} },
-	{ {{ ~0x0000000000e00000, ~0x0001020408000000 }}, {{ ~0x1010101010000000, ~0x0000804020000000 }} },
-	{ {{ ~0x0000000000c00000, ~0x0102040810000000 }}, {{ ~0x2020202020000000, ~0x0000008040000000 }} },
-	{ {{ ~0x0000000000800000, ~0x0204081020000000 }}, {{ ~0x4040404040000000, ~0x0000000080000000 }} },
-	{ {{ ~0x0000000000000000, ~0x0408102040000000 }}, {{ ~0x8080808080000000, ~0x0000000000000000 }} },
-	{ {{ ~0x00000000fe000000, ~0x0000000000000000 }}, {{ ~0x0101010100000000, ~0x1008040200000000 }} },
-	{ {{ ~0x00000000fc000000, ~0x0000000100000000 }}, {{ ~0x0202020200000000, ~0x2010080400000000 }} },
-	{ {{ ~0x00000000f8000000, ~0x0000010200000000 }}, {{ ~0x0404040400000000, ~0x4020100800000000 }} },
-	{ {{ ~0x00000000f0000000, ~0x0001020400000000 }}, {{ ~0x0808080800000000, ~0x8040201000000000 }} },
-	{ {{ ~0x00000000e0000000, ~0x0102040800000000 }}, {{ ~0x1010101000000000, ~0x0080402000000000 }} },
-	{ {{ ~0x00000000c0000000, ~0x0204081000000000 }}, {{ ~0x2020202000000000, ~0x0000804000000000 }} },
-	{ {{ ~0x0000000080000000, ~0x0408102000000000 }}, {{ ~0x4040404000000000, ~0x0000008000000000 }} },
-	{ {{ ~0x0000000000000000, ~0x0810204000000000 }}, {{ ~0x8080808000000000, ~0x0000000000000000 }} },
-	{ {{ ~0x000000fe00000000, ~0x0000000000000000 }}, {{ ~0x0101010000000000, ~0x0804020000000000 }} },
-	{ {{ ~0x000000fc00000000, ~0x0000010000000000 }}, {{ ~0x0202020000000000, ~0x1008040000000000 }} },
-	{ {{ ~0x000000f800000000, ~0x0001020000000000 }}, {{ ~0x0404040000000000, ~0x2010080000000000 }} },
-	{ {{ ~0x000000f000000000, ~0x0102040000000000 }}, {{ ~0x0808080000000000, ~0x4020100000000000 }} },
-	{ {{ ~0x000000e000000000, ~0x0204080000000000 }}, {{ ~0x1010100000000000, ~0x8040200000000000 }} },
-	{ {{ ~0x000000c000000000, ~0x0408100000000000 }}, {{ ~0x2020200000000000, ~0x0080400000000000 }} },
-	{ {{ ~0x0000008000000000, ~0x0810200000000000 }}, {{ ~0x4040400000000000, ~0x0000800000000000 }} },
-	{ {{ ~0x0000000000000000, ~0x1020400000000000 }}, {{ ~0x8080800000000000, ~0x0000000000000000 }} },
-	{ {{ ~0x0000fe0000000000, ~0x0000000000000000 }}, {{ ~0x0101000000000000, ~0x0402000000000000 }} },
-	{ {{ ~0x0000fc0000000000, ~0x0001000000000000 }}, {{ ~0x0202000000000000, ~0x0804000000000000 }} },
-	{ {{ ~0x0000f80000000000, ~0x0102000000000000 }}, {{ ~0x0404000000000000, ~0x1008000000000000 }} },
-	{ {{ ~0x0000f00000000000, ~0x0204000000000000 }}, {{ ~0x0808000000000000, ~0x2010000000000000 }} },
-	{ {{ ~0x0000e00000000000, ~0x0408000000000000 }}, {{ ~0x1010000000000000, ~0x4020000000000000 }} },
-	{ {{ ~0x0000c00000000000, ~0x0810000000000000 }}, {{ ~0x2020000000000000, ~0x8040000000000000 }} },
-	{ {{ ~0x0000800000000000, ~0x1020000000000000 }}, {{ ~0x4040000000000000, ~0x0080000000000000 }} },
-	{ {{ ~0x0000000000000000, ~0x2040000000000000 }}, {{ ~0x8080000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0x00fe000000000000, ~0x0000000000000000 }}, {{ ~0x0100000000000000, ~0x0200000000000000 }} },
-	{ {{ ~0x00fc000000000000, ~0x0100000000000000 }}, {{ ~0x0200000000000000, ~0x0400000000000000 }} },
-	{ {{ ~0x00f8000000000000, ~0x0200000000000000 }}, {{ ~0x0400000000000000, ~0x0800000000000000 }} },
-	{ {{ ~0x00f0000000000000, ~0x0400000000000000 }}, {{ ~0x0800000000000000, ~0x1000000000000000 }} },
-	{ {{ ~0x00e0000000000000, ~0x0800000000000000 }}, {{ ~0x1000000000000000, ~0x2000000000000000 }} },
-	{ {{ ~0x00c0000000000000, ~0x1000000000000000 }}, {{ ~0x2000000000000000, ~0x4000000000000000 }} },
-	{ {{ ~0x0080000000000000, ~0x2000000000000000 }}, {{ ~0x4000000000000000, ~0x8000000000000000 }} },
-	{ {{ ~0x0000000000000000, ~0x4000000000000000 }}, {{ ~0x8000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0xfe00000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0xfc00000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0xf800000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0xf000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0xe000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0xc000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0x8000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} },
-	{ {{ ~0x0000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }} }
-};
-
-#if !defined(__LZCNT__) || !defined(__x86_64__)
-
-static const unsigned long long masko[64][4] = {
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000002, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000006, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000000e, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000001e, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000003e, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000007e, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000600, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000e00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000001e00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000003e00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000007e00, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000200, 0x0000000000000100, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000400, 0x0000000000000200, 0x0000000000000000,
-	0x0000000000020000, 0x0000000000000800, 0x0000000000000400, 0x0000000000000200,
-	0x0000000000060000, 0x0000000000001000, 0x0000000000000800, 0x0000000000000400,
-	0x00000000000e0000, 0x0000000000002000, 0x0000000000001000, 0x0000000000000800,
-	0x00000000001e0000, 0x0000000000004000, 0x0000000000002000, 0x0000000000001000,
-	0x00000000003e0000, 0x0000000000000000, 0x0000000000004000, 0x0000000000002000,
-	0x00000000007e0000, 0x0000000000000000, 0x0000000000008000, 0x0000000000004000,
-	0x0000000000000000, 0x0000000000020400, 0x0000000000010100, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000040800, 0x0000000000020200, 0x0000000000000000,
-	0x0000000002000000, 0x0000000000081000, 0x0000000000040400, 0x0000000000020000,
-	0x0000000006000000, 0x0000000000102000, 0x0000000000080800, 0x0000000000040200,
-	0x000000000e000000, 0x0000000000204000, 0x0000000000101000, 0x0000000000080400,
-	0x000000001e000000, 0x0000000000400000, 0x0000000000202000, 0x0000000000100800,
-	0x000000003e000000, 0x0000000000000000, 0x0000000000404000, 0x0000000000201000,
-	0x000000007e000000, 0x0000000000000000, 0x0000000000808000, 0x0000000000402000,
-	0x0000000000000000, 0x0000000002040800, 0x0000000001010100, 0x0000000000000000,
-	0x0000000000000000, 0x0000000004081000, 0x0000000002020200, 0x0000000000000000,
-	0x0000000200000000, 0x0000000008102000, 0x0000000004040400, 0x0000000002000000,
-	0x0000000600000000, 0x0000000010204000, 0x0000000008080800, 0x0000000004020000,
-	0x0000000e00000000, 0x0000000020400000, 0x0000000010101000, 0x0000000008040200,
-	0x0000001e00000000, 0x0000000040000000, 0x0000000020202000, 0x0000000010080400,
-	0x0000003e00000000, 0x0000000000000000, 0x0000000040404000, 0x0000000020100800,
-	0x0000007e00000000, 0x0000000000000000, 0x0000000080808000, 0x0000000040201000,
-	0x0000000000000000, 0x0000000204081000, 0x0000000101010100, 0x0000000000000000,
-	0x0000000000000000, 0x0000000408102000, 0x0000000202020200, 0x0000000000000000,
-	0x0000020000000000, 0x0000000810204000, 0x0000000404040400, 0x0000000200000000,
-	0x0000060000000000, 0x0000001020400000, 0x0000000808080800, 0x0000000402000000,
-	0x00000e0000000000, 0x0000002040000000, 0x0000001010101000, 0x0000000804020000,
-	0x00001e0000000000, 0x0000004000000000, 0x0000002020202000, 0x0000001008040200,
-	0x00003e0000000000, 0x0000000000000000, 0x0000004040404000, 0x0000002010080400,
-	0x00007e0000000000, 0x0000000000000000, 0x0000008080808000, 0x0000004020100800,
-	0x0000000000000000, 0x0000020408102000, 0x0000010101010100, 0x0000000000000000,
-	0x0000000000000000, 0x0000040810204000, 0x0000020202020200, 0x0000000000000000,
-	0x0002000000000000, 0x0000081020400000, 0x0000040404040400, 0x0000020000000000,
-	0x0006000000000000, 0x0000102040000000, 0x0000080808080800, 0x0000040200000000,
-	0x000e000000000000, 0x0000204000000000, 0x0000101010101000, 0x0000080402000000,
-	0x001e000000000000, 0x0000400000000000, 0x0000202020202000, 0x0000100804020000,
-	0x003e000000000000, 0x0000000000000000, 0x0000404040404000, 0x0000201008040200,
-	0x007e000000000000, 0x0000000000000000, 0x0000808080808000, 0x0000402010080400,
-	0x0000000000000000, 0x0002040810204000, 0x0001010101010100, 0x0000000000000000,
-	0x0000000000000000, 0x0004081020400000, 0x0002020202020200, 0x0000000000000000,
-	0x0200000000000000, 0x0008102040000000, 0x0004040404040400, 0x0002000000000000,
-	0x0600000000000000, 0x0010204000000000, 0x0008080808080800, 0x0004020000000000,
-	0x0e00000000000000, 0x0020400000000000, 0x0010101010101000, 0x0008040200000000,
-	0x1e00000000000000, 0x0040000000000000, 0x0020202020202000, 0x0010080402000000,
-	0x3e00000000000000, 0x0000000000000000, 0x0040404040404000, 0x0020100804020000,
-	0x7e00000000000000, 0x0000000000000000, 0x0080808080808000, 0x0040201008040200
-};
-#endif
-
-static const unsigned long long maskr[64][4] = {
-	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000007, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000000f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000001f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000003f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x000000000000007f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
-	0x0000000000000000, 0x0000000000000002, 0x0000000000000001, 0x0000000000000000,
-	0x0000000000000100, 0x0000000000000004, 0x0000000000000002, 0x0000000000000001,
-	0x0000000000000300, 0x0000000000000008, 0x0000000000000004, 0x0000000000000002,
-	0x0000000000000700, 0x0000000000000010, 0x0000000000000008, 0x0000000000000004,
-	0x0000000000000f00, 0x0000000000000020, 0x0000000000000010, 0x0000000000000008,
-	0x0000000000001f00, 0x0000000000000040, 0x0000000000000020, 0x0000000000000010,
-	0x0000000000003f00, 0x0000000000000080, 0x0000000000000040, 0x0000000000000020,
-	0x0000000000007f00, 0x0000000000000000, 0x0000000000000080, 0x0000000000000040,
-	0x0000000000000000, 0x0000000000000204, 0x0000000000000101, 0x0000000000000000,
-	0x0000000000010000, 0x0000000000000408, 0x0000000000000202, 0x0000000000000100,
-	0x0000000000030000, 0x0000000000000810, 0x0000000000000404, 0x0000000000000201,
-	0x0000000000070000, 0x0000000000001020, 0x0000000000000808, 0x0000000000000402,
-	0x00000000000f0000, 0x0000000000002040, 0x0000000000001010, 0x0000000000000804,
-	0x00000000001f0000, 0x0000000000004080, 0x0000000000002020, 0x0000000000001008,
-	0x00000000003f0000, 0x0000000000008000, 0x0000000000004040, 0x0000000000002010,
-	0x00000000007f0000, 0x0000000000000000, 0x0000000000008080, 0x0000000000004020,
-	0x0000000000000000, 0x0000000000020408, 0x0000000000010101, 0x0000000000000000,
-	0x0000000001000000, 0x0000000000040810, 0x0000000000020202, 0x0000000000010000,
-	0x0000000003000000, 0x0000000000081020, 0x0000000000040404, 0x0000000000020100,
-	0x0000000007000000, 0x0000000000102040, 0x0000000000080808, 0x0000000000040201,
-	0x000000000f000000, 0x0000000000204080, 0x0000000000101010, 0x0000000000080402,
-	0x000000001f000000, 0x0000000000408000, 0x0000000000202020, 0x0000000000100804,
-	0x000000003f000000, 0x0000000000800000, 0x0000000000404040, 0x0000000000201008,
-	0x000000007f000000, 0x0000000000000000, 0x0000000000808080, 0x0000000000402010,
-	0x0000000000000000, 0x0000000002040810, 0x0000000001010101, 0x0000000000000000,
-	0x0000000100000000, 0x0000000004081020, 0x0000000002020202, 0x0000000001000000,
-	0x0000000300000000, 0x0000000008102040, 0x0000000004040404, 0x0000000002010000,
-	0x0000000700000000, 0x0000000010204080, 0x0000000008080808, 0x0000000004020100,
-	0x0000000f00000000, 0x0000000020408000, 0x0000000010101010, 0x0000000008040201,
-	0x0000001f00000000, 0x0000000040800000, 0x0000000020202020, 0x0000000010080402,
-	0x0000003f00000000, 0x0000000080000000, 0x0000000040404040, 0x0000000020100804,
-	0x0000007f00000000, 0x0000000000000000, 0x0000000080808080, 0x0000000040201008,
-	0x0000000000000000, 0x0000000204081020, 0x0000000101010101, 0x0000000000000000,
-	0x0000010000000000, 0x0000000408102040, 0x0000000202020202, 0x0000000100000000,
-	0x0000030000000000, 0x0000000810204080, 0x0000000404040404, 0x0000000201000000,
-	0x0000070000000000, 0x0000001020408000, 0x0000000808080808, 0x0000000402010000,
-	0x00000f0000000000, 0x0000002040800000, 0x0000001010101010, 0x0000000804020100,
-	0x00001f0000000000, 0x0000004080000000, 0x0000002020202020, 0x0000001008040201,
-	0x00003f0000000000, 0x0000008000000000, 0x0000004040404040, 0x0000002010080402,
-	0x00007f0000000000, 0x0000000000000000, 0x0000008080808080, 0x0000004020100804,
-	0x0000000000000000, 0x0000020408102040, 0x0000010101010101, 0x0000000000000000,
-	0x0001000000000000, 0x0000040810204080, 0x0000020202020202, 0x0000010000000000,
-	0x0003000000000000, 0x0000081020408000, 0x0000040404040404, 0x0000020100000000,
-	0x0007000000000000, 0x0000102040800000, 0x0000080808080808, 0x0000040201000000,
-	0x000f000000000000, 0x0000204080000000, 0x0000101010101010, 0x0000080402010000,
-	0x001f000000000000, 0x0000408000000000, 0x0000202020202020, 0x0000100804020100,
-	0x003f000000000000, 0x0000800000000000, 0x0000404040404040, 0x0000201008040201,
-	0x007f000000000000, 0x0000000000000000, 0x0000808080808080, 0x0000402010080402,
-	0x0000000000000000, 0x0002040810204080, 0x0001010101010101, 0x0000000000000000,
-	0x0100000000000000, 0x0004081020408000, 0x0002020202020202, 0x0001000000000000,
-	0x0300000000000000, 0x0008102040800000, 0x0004040404040404, 0x0002010000000000,
-	0x0700000000000000, 0x0010204080000000, 0x0008080808080808, 0x0004020100000000,
-	0x0f00000000000000, 0x0020408000000000, 0x0010101010101010, 0x0008040201000000,
-	0x1f00000000000000, 0x0040800000000000, 0x0020202020202020, 0x0010080402010000,
-	0x3f00000000000000, 0x0080000000000000, 0x0040404040404040, 0x0020100804020100,
-	0x7f00000000000000, 0x0000000000000000, 0x0080808080808080, 0x0040201008040201
-};
-
-#if ((defined(__x86_64__) || defined(USE_GAS_X86)) && defined(__LZCNT__)) || defined(_MSC_VER)
-#define	count_opp_reverse(O,masko,maskr)	lzcnt_u64(~(O) & (maskr))
-#else
-// with guardian bit to avoid __builtin_clz(0)
-#define	count_opp_reverse(O,masko,maskr)	__builtin_clzll(((O) & (masko)) ^ (maskr))
-#endif
-
-#define	SWAP64	0x4e	// for _mm_shuffle_epi32
-#define	SWAP32	0xb1
-
-/**
- * Make inverted flip mask if opponent's disc are surrounded by player's.
- *
- * 0xffffffffffffffffULL (-1) if outflank is 0
- * 0x0000000000000000ULL ( 0) if a 1 is in 64 bit
- */
-static inline __m128i flipmask (__m128i outflank) {
-	return _mm_cmpeq_epi32(_mm_shuffle_epi32(outflank, SWAP32), outflank);
-}
-
-/**
- * Compute flipped discs when playing on square pos.
- *
- * @param pos player's move.
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-unsigned long long flip(const unsigned long long P, const unsigned long long O, int pos)
-{
-	__m128i	outflank17, outflank89, PP, OO;
-	unsigned long long flipped, outflankr1, outflankr7, outflankr8, outflankr9;
-	const __m128i minusone = _mm_set1_epi32(-1);
-
-	outflankr1 = (0x8000000000000000ULL >> count_opp_reverse(O, masko[pos][0], maskr[pos][0])) & P;
-	flipped  = (-outflankr1 * 2) & maskr[pos][0];
-	outflankr7 = (0x8000000000000000ULL >> count_opp_reverse(O, masko[pos][1], maskr[pos][1])) & P;
-	flipped |= (-outflankr7 * 2) & maskr[pos][1];
-	outflankr8 = (0x8000000000000000ULL >> count_opp_reverse(O, masko[pos][2], maskr[pos][2])) & P;
-	flipped |= (-outflankr8 * 2) & maskr[pos][2];
-	outflankr9 = (0x8000000000000000ULL >> count_opp_reverse(O, masko[pos][3], maskr[pos][3])) & P;
-	flipped |= (-outflankr9 * 2) & maskr[pos][3];
-
-	PP = _mm_set1_epi64x(P);
-	OO = _mm_and_si128(_mm_set1_epi64x(O), _mm_set_epi32(0x7e7e7e7e, 0x7e7e7e7e, -1, -1));
-	outflank89 = _mm_and_si128(_mm_andnot_si128(maskl[pos][1].v2, _mm_sub_epi64(_mm_or_si128(OO, maskl[pos][1].v2), minusone)), PP);
-	OO = _mm_unpackhi_epi64(OO, OO);
-	outflank17 = _mm_and_si128(_mm_andnot_si128(maskl[pos][0].v2, _mm_sub_epi64(_mm_or_si128(OO, maskl[pos][0].v2), minusone)), PP);
-	outflank89 = _mm_andnot_si128(maskl[pos][1].v2, _mm_sub_epi64(outflank89, _mm_sub_epi64(flipmask(outflank89), minusone)));
-	outflank17 = _mm_andnot_si128(maskl[pos][0].v2, _mm_sub_epi64(outflank17, _mm_sub_epi64(flipmask(outflank17), minusone)));
-	outflank17 = _mm_or_si128(outflank17, outflank89);
-	outflank17 = _mm_or_si128(outflank17, _mm_shuffle_epi32(outflank17, SWAP64));
-
-	return flipped | _mm_cvtsi128_si64(outflank17);
-}
->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/flip_sse_bswap.c b/src/flip_sse_bswap.c
index d4aeb14..bbed27d 100644
--- a/src/flip_sse_bswap.c
+++ b/src/flip_sse_bswap.c
@@ -1,7 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file flip_sse_bswap.c
  *
@@ -381,15 +377,7 @@ static inline __m128i flipmask (__m128i outflank) {
  * @return flipped disc pattern.
  */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 unsigned long long Flip(int pos, unsigned long long P, unsigned long long O)
-=======
-unsigned long long Flip(int pos, const unsigned long long P, const unsigned long long O)
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-unsigned long long Flip(int pos, unsigned long long P, unsigned long long O)
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 {
 	__m128i	outflank7, outflank8, outflank9, PP, OO;
 	int	x, y8, index_h;
@@ -445,443 +433,3 @@ unsigned long long Flip(int pos, unsigned long long P, unsigned long long O)
 
 	return flipped;
 }
-<<<<<<< HEAD
-=======
-/**
- * @file flip_sse_bswap.c
- *
- * This module deals with flipping discs.
- *
- * A function is provided for each square of the board. These functions are
- * gathered into an array of functions, so that a fast access to each function
- * is allowed. The generic form of the function take as input the player and
- * the opponent bitboards and return the flipped squares into a bitboard.
- *
- * Given the following notation:
- *  - x = square where we play,
- *  - P = player's disc pattern,
- *  - O = opponent's disc pattern,
- * the basic principle is to read into an array the result of a move. Doing
- * this is easier for a single line ; so we can use arrays of the form:
- *  - ARRAY[x][8-bits disc pattern].
- * The problem is thus to convert any line of a 64-bits disc pattern into an
- * 8-bits disc pattern. A fast way to do this is to select the right line,
- * with a bit-mask, to gather the masked-bits into a continuous set by a simple
- * multiplication and to right-shift the result to scale it into a number
- * between 0 and 255.
- * Once we get our 8-bits disc patterns,a first array (OUTFLANK) is used to
- * get the player's discs that surround the opponent discs:
- *  - outflank = OUTFLANK[x][O] & P
- * (Only inner 6-bits of the P are in interest here.)
- * The result is then used as an index to access a second array giving the
- * flipped discs according to the surrounding player's discs:
- *  - flipped = FLIPPED[x][outflank].
- * (Flipped discs fall into inner 6-bits.)
- * Finally, a precomputed array transform the inner 6-bits disc pattern back into a
- * 64-bits disc pattern, and the flipped squares for each line are gathered and
- * returned to generate moves.
- *
- * If the OUTFLANK search is in LSB to MSB direction, carry propagation 
- * can be used to determine contiguous opponent discs.
- * If the OUTFLANK search is in MSB to LSB direction, CONTIG_X tables
- * are used to determine coutiguous opponent discs.
- *
- * @date 1998 - 2018
- * @author Richard Delorme
- * @author Toshihiko Okuhara
- * @version 4.4
- */
-
-#include "bit.h"
-
-/** outflank array (indexed with inner 6 bits) */
-static const unsigned char OUTFLANK[64][8] = {
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x08, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x08, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00 },
-	{ 0x04, 0x00, 0x11, 0x00, 0x04, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x10, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 },
-	{ 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x20, 0x00, 0x08, 0x00, 0x00 },
-	{ 0x04, 0x00, 0x01, 0x20, 0x00, 0x08, 0x00, 0x00 },
-	{ 0x00, 0x08, 0x00, 0x22, 0x00, 0x08, 0x00, 0x00 },
-	{ 0x08, 0x00, 0x00, 0x21, 0x00, 0x08, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x20, 0x00, 0x00, 0x04, 0x00, 0x00 },
-	{ 0x04, 0x00, 0x21, 0x00, 0x00, 0x04, 0x00, 0x00 },
-	{ 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00 },
-	{ 0x20, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x10, 0x00 },
-	{ 0x04, 0x00, 0x01, 0x00, 0x40, 0x00, 0x10, 0x00 },
-	{ 0x00, 0x08, 0x00, 0x02, 0x40, 0x00, 0x10, 0x00 },
-	{ 0x08, 0x00, 0x00, 0x01, 0x40, 0x00, 0x10, 0x00 },
-	{ 0x00, 0x00, 0x10, 0x00, 0x44, 0x00, 0x10, 0x00 },
-	{ 0x04, 0x00, 0x11, 0x00, 0x44, 0x00, 0x10, 0x00 },
-	{ 0x00, 0x10, 0x00, 0x00, 0x42, 0x00, 0x10, 0x00 },
-	{ 0x10, 0x00, 0x00, 0x00, 0x41, 0x00, 0x10, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x08, 0x00 },
-	{ 0x04, 0x00, 0x01, 0x40, 0x00, 0x00, 0x08, 0x00 },
-	{ 0x00, 0x08, 0x00, 0x42, 0x00, 0x00, 0x08, 0x00 },
-	{ 0x08, 0x00, 0x00, 0x41, 0x00, 0x00, 0x08, 0x00 },
-	{ 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00 },
-	{ 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, 0x04, 0x00 },
-	{ 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00 },
-	{ 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x20 },
-	{ 0x04, 0x00, 0x01, 0x00, 0x00, 0x80, 0x00, 0x20 },
-	{ 0x00, 0x08, 0x00, 0x02, 0x00, 0x80, 0x00, 0x20 },
-	{ 0x08, 0x00, 0x00, 0x01, 0x00, 0x80, 0x00, 0x20 },
-	{ 0x00, 0x00, 0x10, 0x00, 0x04, 0x80, 0x00, 0x20 },
-	{ 0x04, 0x00, 0x11, 0x00, 0x04, 0x80, 0x00, 0x20 },
-	{ 0x00, 0x10, 0x00, 0x00, 0x02, 0x80, 0x00, 0x20 },
-	{ 0x10, 0x00, 0x00, 0x00, 0x01, 0x80, 0x00, 0x20 },
-	{ 0x00, 0x00, 0x00, 0x20, 0x00, 0x88, 0x00, 0x20 },
-	{ 0x04, 0x00, 0x01, 0x20, 0x00, 0x88, 0x00, 0x20 },
-	{ 0x00, 0x08, 0x00, 0x22, 0x00, 0x88, 0x00, 0x20 },
-	{ 0x08, 0x00, 0x00, 0x21, 0x00, 0x88, 0x00, 0x20 },
-	{ 0x00, 0x00, 0x20, 0x00, 0x00, 0x84, 0x00, 0x20 },
-	{ 0x04, 0x00, 0x21, 0x00, 0x00, 0x84, 0x00, 0x20 },
-	{ 0x00, 0x20, 0x00, 0x00, 0x00, 0x82, 0x00, 0x20 },
-	{ 0x20, 0x00, 0x00, 0x00, 0x00, 0x81, 0x00, 0x20 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x10 },
-	{ 0x04, 0x00, 0x01, 0x00, 0x80, 0x00, 0x00, 0x10 },
-	{ 0x00, 0x08, 0x00, 0x02, 0x80, 0x00, 0x00, 0x10 },
-	{ 0x08, 0x00, 0x00, 0x01, 0x80, 0x00, 0x00, 0x10 },
-	{ 0x00, 0x00, 0x10, 0x00, 0x84, 0x00, 0x00, 0x10 },
-	{ 0x04, 0x00, 0x11, 0x00, 0x84, 0x00, 0x00, 0x10 },
-	{ 0x00, 0x10, 0x00, 0x00, 0x82, 0x00, 0x00, 0x10 },
-	{ 0x10, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x10 },
-	{ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x08 },
-	{ 0x04, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00, 0x08 },
-	{ 0x00, 0x08, 0x00, 0x82, 0x00, 0x00, 0x00, 0x08 },
-	{ 0x08, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00, 0x08 },
-	{ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x04 },
-	{ 0x04, 0x00, 0x81, 0x00, 0x00, 0x00, 0x00, 0x04 },
-	{ 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
-	{ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }
-};
-
-/** flip array (indexed with outflank) */
-static const unsigned char FLIPPED[137][8] = {
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x02, 0x06, 0x0e, 0x1e, 0x3e, 0x7e },
-	{ 0x00, 0x00, 0x00, 0x04, 0x0c, 0x1c, 0x3c, 0x7c },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x02, 0x00, 0x00, 0x00, 0x08, 0x18, 0x38, 0x78 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x06, 0x04, 0x00, 0x00, 0x00, 0x10, 0x30, 0x70 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x0e, 0x0c, 0x08, 0x00, 0x00, 0x00, 0x20, 0x60 },
-	{ 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x1e, 0x1c, 0x18, 0x10, 0x00, 0x00, 0x00, 0x40 },
-	{ 0x00, 0x00, 0x1a, 0x16, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x3a, 0x36, 0x2e, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x34, 0x2c, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x7a, 0x76, 0x6e, 0x5e, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x74, 0x6c, 0x5c, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x68, 0x58, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
-	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00 }
-};
-
-static const V2DI mask[3][64] = { {
-	{{ ~0x0000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000100, ~0x0000000000000000 }},
-	{{ ~0x0000000000010200, ~0x0000000000000000 }}, {{ ~0x0000000001020400, ~0x0000000000000000 }},
-	{{ ~0x0000000102040800, ~0x0000000000000000 }}, {{ ~0x0000010204081000, ~0x0000000000000000 }},
-	{{ ~0x0001020408102000, ~0x0000000000000000 }}, {{ ~0x0102040810204000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x0200000000000000 }}, {{ ~0x0000000000010000, ~0x0400000000000000 }},
-	{{ ~0x0000000001020000, ~0x0800000000000000 }}, {{ ~0x0000000102040000, ~0x1000000000000000 }},
-	{{ ~0x0000010204080000, ~0x2000000000000000 }}, {{ ~0x0001020408100000, ~0x4000000000000000 }},
-	{{ ~0x0102040810200000, ~0x8000000000000000 }}, {{ ~0x0204081020400000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x0402000000000000 }}, {{ ~0x0000000001000000, ~0x0804000000000000 }},
-	{{ ~0x0000000102000000, ~0x1008000000000000 }}, {{ ~0x0000010204000000, ~0x2010000000000000 }},
-	{{ ~0x0001020408000000, ~0x4020000000000000 }}, {{ ~0x0102040810000000, ~0x8040000000000000 }},
-	{{ ~0x0204081020000000, ~0x0080000000000000 }}, {{ ~0x0408102040000000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x0804020000000000 }}, {{ ~0x0000000100000000, ~0x1008040000000000 }},
-	{{ ~0x0000010200000000, ~0x2010080000000000 }}, {{ ~0x0001020400000000, ~0x4020100000000000 }},
-	{{ ~0x0102040800000000, ~0x8040200000000000 }}, {{ ~0x0204081000000000, ~0x0080400000000000 }},
-	{{ ~0x0408102000000000, ~0x0000800000000000 }}, {{ ~0x0810204000000000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x1008040200000000 }}, {{ ~0x0000010000000000, ~0x2010080400000000 }},
-	{{ ~0x0001020000000000, ~0x4020100800000000 }}, {{ ~0x0102040000000000, ~0x8040201000000000 }},
-	{{ ~0x0204080000000000, ~0x0080402000000000 }}, {{ ~0x0408100000000000, ~0x0000804000000000 }},
-	{{ ~0x0810200000000000, ~0x0000008000000000 }}, {{ ~0x1020400000000000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x2010080402000000 }}, {{ ~0x0001000000000000, ~0x4020100804000000 }},
-	{{ ~0x0102000000000000, ~0x8040201008000000 }}, {{ ~0x0204000000000000, ~0x0080402010000000 }},
-	{{ ~0x0408000000000000, ~0x0000804020000000 }}, {{ ~0x0810000000000000, ~0x0000008040000000 }},
-	{{ ~0x1020000000000000, ~0x0000000080000000 }}, {{ ~0x2040000000000000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x4020100804020000 }}, {{ ~0x0100000000000000, ~0x8040201008040000 }},
-	{{ ~0x0200000000000000, ~0x0080402010080000 }}, {{ ~0x0400000000000000, ~0x0000804020100000 }},
-	{{ ~0x0800000000000000, ~0x0000008040200000 }}, {{ ~0x1000000000000000, ~0x0000000080400000 }},
-	{{ ~0x2000000000000000, ~0x0000000000800000 }}, {{ ~0x4000000000000000, ~0x0000000000000000 }},
-	{{ ~0x0000000000000000, ~0x8040201008040200 }}, {{ ~0x0000000000000000, ~0x0080402010080400 }},
-	{{ ~0x0000000000000000, ~0x0000804020100800 }}, {{ ~0x0000000000000000, ~0x0000008040201000 }},
-	{{ ~0x0000000000000000, ~0x0000000080402000 }}, {{ ~0x0000000000000000, ~0x0000000000804000 }},
-	{{ ~0x0000000000000000, ~0x0000000000008000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }}
-}, {
-	{{ ~0x0101010101010100, ~0x0000000000000000 }}, {{ ~0x0202020202020200, ~0x0000000000000000 }},
-	{{ ~0x0404040404040400, ~0x0000000000000000 }}, {{ ~0x0808080808080800, ~0x0000000000000000 }},
-	{{ ~0x1010101010101000, ~0x0000000000000000 }}, {{ ~0x2020202020202000, ~0x0000000000000000 }},
-	{{ ~0x4040404040404000, ~0x0000000000000000 }}, {{ ~0x8080808080808000, ~0x0000000000000000 }},
-	{{ ~0x0101010101010000, ~0x0100000000000000 }}, {{ ~0x0202020202020000, ~0x0200000000000000 }},
-	{{ ~0x0404040404040000, ~0x0400000000000000 }}, {{ ~0x0808080808080000, ~0x0800000000000000 }},
-	{{ ~0x1010101010100000, ~0x1000000000000000 }}, {{ ~0x2020202020200000, ~0x2000000000000000 }},
-	{{ ~0x4040404040400000, ~0x4000000000000000 }}, {{ ~0x8080808080800000, ~0x8000000000000000 }},
-	{{ ~0x0101010101000000, ~0x0101000000000000 }}, {{ ~0x0202020202000000, ~0x0202000000000000 }},
-	{{ ~0x0404040404000000, ~0x0404000000000000 }}, {{ ~0x0808080808000000, ~0x0808000000000000 }},
-	{{ ~0x1010101010000000, ~0x1010000000000000 }}, {{ ~0x2020202020000000, ~0x2020000000000000 }},
-	{{ ~0x4040404040000000, ~0x4040000000000000 }}, {{ ~0x8080808080000000, ~0x8080000000000000 }},
-	{{ ~0x0101010100000000, ~0x0101010000000000 }}, {{ ~0x0202020200000000, ~0x0202020000000000 }},
-	{{ ~0x0404040400000000, ~0x0404040000000000 }}, {{ ~0x0808080800000000, ~0x0808080000000000 }},
-	{{ ~0x1010101000000000, ~0x1010100000000000 }}, {{ ~0x2020202000000000, ~0x2020200000000000 }},
-	{{ ~0x4040404000000000, ~0x4040400000000000 }}, {{ ~0x8080808000000000, ~0x8080800000000000 }},
-	{{ ~0x0101010000000000, ~0x0101010100000000 }}, {{ ~0x0202020000000000, ~0x0202020200000000 }},
-	{{ ~0x0404040000000000, ~0x0404040400000000 }}, {{ ~0x0808080000000000, ~0x0808080800000000 }},
-	{{ ~0x1010100000000000, ~0x1010101000000000 }}, {{ ~0x2020200000000000, ~0x2020202000000000 }},
-	{{ ~0x4040400000000000, ~0x4040404000000000 }}, {{ ~0x8080800000000000, ~0x8080808000000000 }},
-	{{ ~0x0101000000000000, ~0x0101010101000000 }}, {{ ~0x0202000000000000, ~0x0202020202000000 }},
-	{{ ~0x0404000000000000, ~0x0404040404000000 }}, {{ ~0x0808000000000000, ~0x0808080808000000 }},
-	{{ ~0x1010000000000000, ~0x1010101010000000 }}, {{ ~0x2020000000000000, ~0x2020202020000000 }},
-	{{ ~0x4040000000000000, ~0x4040404040000000 }}, {{ ~0x8080000000000000, ~0x8080808080000000 }},
-	{{ ~0x0100000000000000, ~0x0101010101010000 }}, {{ ~0x0200000000000000, ~0x0202020202020000 }},
-	{{ ~0x0400000000000000, ~0x0404040404040000 }}, {{ ~0x0800000000000000, ~0x0808080808080000 }},
-	{{ ~0x1000000000000000, ~0x1010101010100000 }}, {{ ~0x2000000000000000, ~0x2020202020200000 }},
-	{{ ~0x4000000000000000, ~0x4040404040400000 }}, {{ ~0x8000000000000000, ~0x8080808080800000 }},
-	{{ ~0x0000000000000000, ~0x0101010101010100 }}, {{ ~0x0000000000000000, ~0x0202020202020200 }},
-	{{ ~0x0000000000000000, ~0x0404040404040400 }}, {{ ~0x0000000000000000, ~0x0808080808080800 }},
-	{{ ~0x0000000000000000, ~0x1010101010101000 }}, {{ ~0x0000000000000000, ~0x2020202020202000 }},
-	{{ ~0x0000000000000000, ~0x4040404040404000 }}, {{ ~0x0000000000000000, ~0x8080808080808000 }}
-}, {
-	{{ ~0x8040201008040200, ~0x0000000000000000 }}, {{ ~0x0080402010080400, ~0x0000000000000000 }},
-	{{ ~0x0000804020100800, ~0x0000000000000000 }}, {{ ~0x0000008040201000, ~0x0000000000000000 }},
-	{{ ~0x0000000080402000, ~0x0000000000000000 }}, {{ ~0x0000000000804000, ~0x0000000000000000 }},
-	{{ ~0x0000000000008000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000000 }},
-	{{ ~0x4020100804020000, ~0x0000000000000000 }}, {{ ~0x8040201008040000, ~0x0100000000000000 }},
-	{{ ~0x0080402010080000, ~0x0200000000000000 }}, {{ ~0x0000804020100000, ~0x0400000000000000 }},
-	{{ ~0x0000008040200000, ~0x0800000000000000 }}, {{ ~0x0000000080400000, ~0x1000000000000000 }},
-	{{ ~0x0000000000800000, ~0x2000000000000000 }}, {{ ~0x0000000000000000, ~0x4000000000000000 }},
-	{{ ~0x2010080402000000, ~0x0000000000000000 }}, {{ ~0x4020100804000000, ~0x0001000000000000 }},
-	{{ ~0x8040201008000000, ~0x0102000000000000 }}, {{ ~0x0080402010000000, ~0x0204000000000000 }},
-	{{ ~0x0000804020000000, ~0x0408000000000000 }}, {{ ~0x0000008040000000, ~0x0810000000000000 }},
-	{{ ~0x0000000080000000, ~0x1020000000000000 }}, {{ ~0x0000000000000000, ~0x2040000000000000 }},
-	{{ ~0x1008040200000000, ~0x0000000000000000 }}, {{ ~0x2010080400000000, ~0x0000010000000000 }},
-	{{ ~0x4020100800000000, ~0x0001020000000000 }}, {{ ~0x8040201000000000, ~0x0102040000000000 }},
-	{{ ~0x0080402000000000, ~0x0204080000000000 }}, {{ ~0x0000804000000000, ~0x0408100000000000 }},
-	{{ ~0x0000008000000000, ~0x0810200000000000 }}, {{ ~0x0000000000000000, ~0x1020400000000000 }},
-	{{ ~0x0804020000000000, ~0x0000000000000000 }}, {{ ~0x1008040000000000, ~0x0000000100000000 }},
-	{{ ~0x2010080000000000, ~0x0000010200000000 }}, {{ ~0x4020100000000000, ~0x0001020400000000 }},
-	{{ ~0x8040200000000000, ~0x0102040800000000 }}, {{ ~0x0080400000000000, ~0x0204081000000000 }},
-	{{ ~0x0000800000000000, ~0x0408102000000000 }}, {{ ~0x0000000000000000, ~0x0810204000000000 }},
-	{{ ~0x0402000000000000, ~0x0000000000000000 }}, {{ ~0x0804000000000000, ~0x0000000001000000 }},
-	{{ ~0x1008000000000000, ~0x0000000102000000 }}, {{ ~0x2010000000000000, ~0x0000010204000000 }},
-	{{ ~0x4020000000000000, ~0x0001020408000000 }}, {{ ~0x8040000000000000, ~0x0102040810000000 }},
-	{{ ~0x0080000000000000, ~0x0204081020000000 }}, {{ ~0x0000000000000000, ~0x0408102040000000 }},
-	{{ ~0x0200000000000000, ~0x0000000000000000 }}, {{ ~0x0400000000000000, ~0x0000000000010000 }},
-	{{ ~0x0800000000000000, ~0x0000000001020000 }}, {{ ~0x1000000000000000, ~0x0000000102040000 }},
-	{{ ~0x2000000000000000, ~0x0000010204080000 }}, {{ ~0x4000000000000000, ~0x0001020408100000 }},
-	{{ ~0x8000000000000000, ~0x0102040810200000 }}, {{ ~0x0000000000000000, ~0x0204081020400000 }},
-	{{ ~0x0000000000000000, ~0x0000000000000000 }}, {{ ~0x0000000000000000, ~0x0000000000000100 }},
-	{{ ~0x0000000000000000, ~0x0000000000010200 }}, {{ ~0x0000000000000000, ~0x0000000001020400 }},
-	{{ ~0x0000000000000000, ~0x0000000102040800 }}, {{ ~0x0000000000000000, ~0x0000010204081000 }},
-	{{ ~0x0000000000000000, ~0x0001020408102000 }}, {{ ~0x0000000000000000, ~0x0102040810204000 }}
-} };
-
-#define	SWAP64	0x4e	// for _mm_shuffle_epi32
-#define	SWAP32	0xb1
-
-/**
- * Make inverted flip mask if opponent's disc are surrounded by player's.
- *
- * -1 if outflank is 0
- *  0 if a 1 is in 64 bit
- */
-static inline __m128i flipmask (__m128i outflank) {
-	return _mm_cmpeq_epi32(_mm_shuffle_epi32(outflank, SWAP32), outflank);
-}
-
-/**
- * Compute flipped discs when playing on square pos.
- *
- * @param pos player's move.
- * @param P player's disc pattern.
- * @param O opponent's disc pattern.
- * @return flipped disc pattern.
- */
-
-unsigned long long Flip(int pos, const unsigned long long P, const unsigned long long O)
-{
-	__m128i	outflank7, outflank8, outflank9, PP, OO;
-	int	x, y8, index_h;
-	unsigned long long	flipped;
-	static const V2DI minusone = {{ -1LL, -1LL }};
-
-	if (pos >= 64)	// pass
-		return 0;
-
-#ifdef __x86_64__
-	PP = _mm_set_epi64x(vertical_mirror(P), P);
-	OO = _mm_set_epi64x(vertical_mirror(O), O);
-#else
-	PP = _mm_set_epi32(bswap_int(P), bswap_int(P >> 32), (P >> 32), P);
-	OO = _mm_set_epi32(bswap_int(O), bswap_int(O >> 32), (O >> 32), O);
-#endif
-
-#if (defined (USE_GAS_X64) || defined(USE_GAS_X86)) && !defined(__AVX__)
-	__asm__ (
-						"movdqa	%4, %1\n\t"		"movdqa	%4, %2\n\t"
-		"movdqa	(%5), %0\n\t"		"movdqa	1024(%5), %%xmm3\n\t"	"movdqa	2048(%5), %%xmm5\n\t"
-		"por	%0, %4\n\t"		"por	%%xmm3, %1\n\t"		"por	%%xmm5, %2\n\t"
-		"psubq	%6, %4\n\t"		"psubq	%6, %1\n\t"		"psubq	%6, %2\n\t"
-		"pand	%3, %4\n\t"		"pand	%3, %1\n\t"		"pand	%3, %2\n\t"
-		"movdqa	%0, %3\n\t"
-		"pandn	%4, %3\n\t"		"pandn	%1, %%xmm3\n\t"		"pandn	%2, %%xmm5\n\t"
-		"pshufd	$177, %3, %4\n\t"	"pshufd	$177, %%xmm3, %1\n\t"	"pshufd	$177, %%xmm5, %2\n\t"
-		"pcmpeqd %3, %4\n\t"		"pcmpeqd %%xmm3, %1\n\t"	"pcmpeqd %%xmm5, %2\n\t"
-		"paddq	%6, %3\n\t"		"paddq	%6, %%xmm3\n\t"		"paddq	%6, %%xmm5\n\t"
-		"psubq	%4, %3\n\t"		"psubq	%1, %%xmm3\n\t"		"psubq	%2, %%xmm5\n\t"
-						"movdqa	1024(%5), %1\n\t"	"movdqa	2048(%5), %2\n\t"
-		"pandn	%3, %0\n\t"		"pandn	%%xmm3, %1\n\t"		"pandn	%%xmm5, %2\n\t"
-						"por	%1, %0\n\t"		"por	%2, %0"
-	: "=&x" (outflank7), "=&x" (outflank8), "=&x" (outflank9)
-	: "x" (PP), "x" (OO), "r" (&mask[0][pos]), "xm" (minusone)
-	: "xmm3", "xmm5");
-
-#else
-	outflank7 = _mm_andnot_si128(mask[0][pos].v2, _mm_and_si128(_mm_sub_epi64(_mm_or_si128(OO, mask[0][pos].v2), minusone.v2), PP));
-	outflank8 = _mm_andnot_si128(mask[1][pos].v2, _mm_and_si128(_mm_sub_epi64(_mm_or_si128(OO, mask[1][pos].v2), minusone.v2), PP));
-	outflank9 = _mm_andnot_si128(mask[2][pos].v2, _mm_and_si128(_mm_sub_epi64(_mm_or_si128(OO, mask[2][pos].v2), minusone.v2), PP));
-	outflank7 = _mm_andnot_si128(mask[0][pos].v2, _mm_sub_epi64(outflank7, _mm_sub_epi64(flipmask(outflank7), minusone.v2)));
-	outflank8 = _mm_andnot_si128(mask[1][pos].v2, _mm_sub_epi64(outflank8, _mm_sub_epi64(flipmask(outflank8), minusone.v2)));
-	outflank9 = _mm_andnot_si128(mask[2][pos].v2, _mm_sub_epi64(outflank9, _mm_sub_epi64(flipmask(outflank9), minusone.v2)));
-	outflank7 = _mm_or_si128(_mm_or_si128(outflank7, outflank8), outflank9);
-#endif
-	flipped = _mm_cvtsi128_si64(outflank7) | vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(outflank7, outflank7)));
-
-	x = pos & 7;
-	y8 = pos & 0x38;
-	index_h = OUTFLANK[((unsigned int) (O >> y8) & 0x7E) >> 1][x] & (P >> y8);
-	flipped |= ((unsigned long long) FLIPPED[index_h][x]) << y8;
-
-	return flipped;
-}
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/flip_sve_lzcnt.c b/src/flip_sve_lzcnt.c
index b0b4ddd..d4d1cf3 100644
--- a/src/flip_sve_lzcnt.c
+++ b/src/flip_sve_lzcnt.c
@@ -5,15 +5,7 @@
  *
  * For LSB to MSB directions, isolate LS1B can be used to determine
  * contiguous opponent discs.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * For MSB to LSB directions, CLZ is used to isolate MS1B.
-=======
- * For MSB to LSB directions, LZCNT is used to isolate MS1B.
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
-=======
- * For MSB to LSB directions, CLZ is used to isolate MS1B.
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
  *
  * @date 2024
  * @author Toshihiko Okuhara
@@ -23,15 +15,7 @@
 #include "arm_sve.h"
 #include "arm_neon.h"
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-const uint64_t lrmask[66][8] = {
-=======
-static const uint64_t lrmask[66][8] = {
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
-=======
 const uint64_t lrmask[66][8] = {
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 	{ 0x00000000000000fe, 0x0101010101010100, 0x8040201008040200, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
 	{ 0x00000000000000fc, 0x0202020202020200, 0x0080402010080400, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
 	{ 0x00000000000000f8, 0x0404040404040400, 0x0000804020100800, 0x0000000000010200, 0x0000000000000003, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
diff --git a/src/game.c b/src/game.c
index 4916724..9b6261d 100644
--- a/src/game.c
+++ b/src/game.c
@@ -3,19 +3,7 @@
  *
  * Game management
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2020
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
- * @date 1998 - 2022
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -170,15 +158,7 @@ bool game_update_board(Board *board, int x)
 	if (!can_move(board->player, board->opponent)) {
 		board_pass(board);
 	}
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (board_get_move_flip(board, x, &move) == 0) return false;
-=======
-	if (board_get_move(board, x, &move) == 0) return false;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	if (board_get_move_flip(board, x, &move) == 0) return false;
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 	board_update(board, &move);
 
 	return true;
@@ -198,15 +178,7 @@ static bool game_update_player(Board *board, int x)
 			board_pass(board);
 			swap = !swap;
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (board_get_move_flip(board, x, &move) == 0) swap = !swap;
-=======
-		if (board_get_move(board, x, &move) == 0) swap = !swap;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		if (board_get_move_flip(board, x, &move) == 0) swap = !swap;
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 	}
 	
 	return swap;
@@ -306,15 +278,7 @@ void text_to_game(const char *line, Game *game)
 		s = parse_move(line, &board, &move);
 		if (s == line && move.x == NOMOVE) return;
 		if (move.x != PASS) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 			game->hash = crc32c_u8(game->hash, move.x);
-=======
-			game->hash ^= hash_move[move.x][i];
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			game->hash = crc32c_u8(game->hash, move.x);
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 			game->move[i++] = move.x;
 		}
 		board_update(&board, &move);
@@ -430,15 +394,7 @@ void game_append_line(Game *game, const Line *line, const int from)
 		for (i = 0, j = from; i < line->n_moves && j < 60; ++i) {
 			if (line->move[i] != PASS) {
 				if (game_update_board(&board, line->move[i])) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-					game->hash = crc32c_u8(game->hash, line->move[i]);
-=======
-					game->hash ^= hash_move[(int)line->move[i]][j];
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 					game->hash = crc32c_u8(game->hash, line->move[i]);
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 					game->move[j++] = line->move[i];
 				} else {
 					break;
@@ -775,27 +731,11 @@ void game_export_ggf(const Game *game, FILE *f)
 	fputs("BO[8 ", f);
 
 	if (game->player == BLACK) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		bk = game->initial_board.player;
 		wh = game->initial_board.opponent;
 	} else {
 		bk = game->initial_board.opponent;
 		wh = game->initial_board.player;
-=======
-		bk = game->initial_board->player;
-		wh = game->initial_board->opponent;
-	} else {
-		bk = game->initial_board->opponent;
-		wh = game->initial_board->player;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-		bk = game->initial_board.player;
-		wh = game->initial_board.opponent;
-	} else {
-		bk = game->initial_board.opponent;
-		wh = game->initial_board.player;
->>>>>>> 0a166fd (Remove 1 element array coding style)
 	}
 	for (x = 0; x < 64; ++x) {
 		square = 2 - (wh & 1) - 2 * (bk & 1);
@@ -1444,18 +1384,7 @@ void game_export_eps(const Game *game, FILE *f)
 		"\t(8)  14  15 moveto show\n"
 		"}def\n"
 		"%%EndProlog\n\n"
-<<<<<<< HEAD
-=======
-
-		"% do the drawing\n"
-		"gsave\n"
-		"\n\t% draw an empty board\n"
-		"\tboard_coord\n"
-		"\tboard_grid\n"
-		"\n\t% draw the discs\n", f);
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 
-<<<<<<< HEAD
 		"% do the drawing\n"
 		"gsave\n"
 		"\n\t% draw an empty board\n"
@@ -1463,8 +1392,6 @@ void game_export_eps(const Game *game, FILE *f)
 		"\tboard_grid\n"
 		"\n\t% draw the discs\n", f);
 
-=======
->>>>>>> 0a166fd (Remove 1 element array coding style)
 	board = game->initial_board;
 	for (i = A1; i <= H8; i++) {
 		color = board_get_square_color(&board, i);
@@ -1588,38 +1515,16 @@ void game_rand(Game *game, int n_ply, Random *r)
 	game_init(game);
 	board_init(&board);
 	for (ply = 0; ply < n_ply; ply++) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		moves = board_get_moves(&board);
 		if (!moves) {
 			board_pass(&board);
 			moves = board_get_moves(&board);
-=======
-		moves = get_moves(board.player, board.opponent);
-		if (!moves) {
-			board_pass(&board);
-			moves = get_moves(board.player, board.opponent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		moves = board_get_moves(&board);
-		if (!moves) {
-			board_pass(&board);
-			moves = board_get_moves(&board);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 			if (!moves) {
 				break;
 			}
 		}
 		;
-<<<<<<< HEAD
-<<<<<<< HEAD
 		board_get_move_flip(&board, get_rand_bit(moves, r), &move);
-=======
-		board_get_move(&board, get_rand_bit(moves, r), &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		board_get_move_flip(&board, get_rand_bit(moves, r), &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		game->move[ply] = move.x;
 		board_update(&board, &move);
 	}
@@ -1663,10 +1568,6 @@ int game_analyze(Game *game, Search *search, const int n_empties, const bool app
 			board_pass(&board);
 			player = !player;
 		} 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_move].played)) {
 			stack[n_move].best = MOVE_INIT;
 			line_init(&stack[n_move].pv, player);
@@ -1674,21 +1575,6 @@ int game_analyze(Game *game, Search *search, const int n_empties, const bool app
 			search_set_level(search, 60, search->eval.n_empties);
 			stack[n_move].n_empties = search->eval.n_empties;
 			if (search->movelist.n_moves > 1 && search->eval.n_empties <= n_empties) {
-=======
-		if (!board_is_occupied(&board, game->move[i]) && board_get_move(&board, game->move[i], &stack[n_move].played)) {
-			stack[n_move].best = MOVE_INIT;
-			line_init(&stack[n_move].pv, player);
-			search_set_board(search, &board, player);
-<<<<<<< HEAD
-			search_set_level(search, 60, search->n_empties);
-			stack[n_move].n_empties = search->n_empties;
-			if (search->movelist.n_moves > 1 && search->n_empties <= n_empties) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			search_set_level(search, 60, search->eval.n_empties);
-			stack[n_move].n_empties = search->eval.n_empties;
-			if (search->movelist.n_moves > 1 && search->eval.n_empties <= n_empties) {
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 				movelist_exclude(&search->movelist, game->move[i]);
 				search_run(search);
 				stack[n_move].best = *(movelist_first(&search->movelist));
@@ -1708,18 +1594,8 @@ int game_analyze(Game *game, Search *search, const int n_empties, const bool app
 	}
 
 	search_set_board(search, &board, player);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (search->eval.n_empties <= n_empties) {
-		search_set_level(search, 60, search->eval.n_empties);
-=======
-	if (search->n_empties <= n_empties) {
-		search_set_level(search, 60, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	if (search->eval.n_empties <= n_empties) {
 		search_set_level(search, 60, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 		search_run(search);
 		score = search->result->score;
 		
@@ -1785,15 +1661,7 @@ int game_complete(Game *game, Search *search)
 
 		search_set_board(search, &board, player);
 		search_run(search);
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (search->result->depth == search->eval.n_empties && search->result->selectivity == NO_SELECTIVITY) {
-=======
-		if (search->result->depth == search->n_empties && search->result->selectivity == NO_SELECTIVITY) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		if (search->result->depth == search->eval.n_empties && search->result->selectivity == NO_SELECTIVITY) {
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 			game_append_line(game, &search->result->pv, i);
 		} else {
 			game->move[i] = search->result->move;
diff --git a/src/ggs.c b/src/ggs.c
index de4e080..c2f4c67 100644
--- a/src/ggs.c
+++ b/src/ggs.c
@@ -1222,15 +1222,7 @@ static void ui_ggs_play(UI *ui, int turn) {
 			ui->ggs->me, move, ui->ggs->board->id,
 			result->score
 		);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	} else if (play->search.eval.n_empties >= 15) { //avoid noisy display
-=======
-	} else if (play->search.n_empties >= 15) { //avoid noisy display
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	} else if (play->search.eval.n_empties >= 15) { //avoid noisy display
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 		const char *bound;
 		char s_nodes[16], s_speed[16];
 
diff --git a/src/hash.c b/src/hash.c
index e9832e6..2384d12 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -14,23 +14,7 @@
  * When doing parallel search with a shared hashtable, a locked implementation
  * avoid concurrency collisions.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2020
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -47,74 +31,10 @@
 #include <stdio.h>
 #include <assert.h>
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-/** hashing global data */
-unsigned long long hash_move[64][60];
-
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
-=======
-// use vectored board if vectorcall available and hboard_equal is efficient enough
-#ifdef _M_X64
-	#define	store_hboard(p,b)	_mm_storeu_si128((__m128i *) (p), (b))
-  #if defined(__SSE4__) || defined(__AVX__)
-	inline bool hboard_equal(__m128i b1, Board *b2)
-	{
-		b1 = _mm_xor_si128(b1, _mm_loadu_si128((__m128i *) b2));
-		return _mm_testz_si128(b1, b1);
-	}
-  #else
-	#define	hboard_equal(b1,b2)	(_mm_movemask_epi8(_mm_cmpeq_epi8(b1, _mm_loadu_si128((__m128i *) b2))) == 0xffff)
-  #endif
-
-#elif defined(__aarch64__) || defined(_M_ARM64)
-	#define	store_hboard(p,b)	vst1q_u64((uint64_t *) (p), (b))
-  #ifdef _M_ARM64	// https://stackoverflow.com/questions/15389539/fastest-way-to-test-a-128-bit-neon-register-for-a-value-of-0-using-intrinsics
-	#define	hboard_equal(b1,b2)	(neon_umaxvq32(veorq_u64((b1), vld1q_u64((uint64_t *) (b2)))) == 0)
-  #else
-	#define	hboard_equal(b1,b2)	(vmaxvq_u32(vreinterpretq_u32_u64(veorq_u64((b1), vld1q_u64((uint64_t *) (b2))))) == 0)
-  #endif
-
-#else
-	#define	store_hboard(p,b)	*(p) = *(b)
-	#define	hboard_equal(b1,b2)	board_equal(b1, b2)
-#endif
-
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 /** HashData init value */
 const HashData HASH_DATA_INIT = {{{ 0, 0, 0, 0 }}, -SCORE_INF, SCORE_INF, { NOMOVE, NOMOVE }};
-<<<<<<< HEAD
-=======
-
-/**
-<<<<<<< HEAD
- * @brief Initialize global hash move data.
- */
-void hash_move_init(void)
-{
-	int i, j;
-	Random r;
-
-	random_seed(&r, 0x5DEECE66Dull);
-	for (i = 0; i < 64; ++i)
-	for (j = 0; j < 60; ++j) {
-		do {
-			hash_move[i][j] = random_get(&r);
-		} while (bit_count(hash_move[i][j]) < 8); 
-	}
-}
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 
 /**
-=======
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
  * @brief Initialise the hashtable.
  *
  * Allocate the hash table entries and initialise the hash masks.
@@ -126,15 +46,7 @@ void hash_init(HashTable *hash_table, const unsigned long long size)
 {
 	int i, n_way;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	for (n_way = 1; n_way < HASH_N_WAY; n_way <<= 1);	// round up HASH_N_WAY to 2 ^ n
-=======
-	for (n_way = 1; n_way < HASH_N_WAY; n_way <<= 1);	// 2 ^ n, at leaset HASH_N_WAY
->>>>>>> 494a38b (AVX/SSE optimized hash_cleanup)
-=======
 	for (n_way = 1; n_way < HASH_N_WAY; n_way <<= 1);	// round up HASH_N_WAY to 2 ^ n
->>>>>>> 42dc349 (add sfence to be sure; correct comments)
 
 	assert(hash_table != NULL);
 	assert((n_way & -n_way) == n_way);
@@ -147,23 +59,8 @@ void hash_init(HashTable *hash_table, const unsigned long long size)
 	}
 
 	if (HASH_ALIGNED) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		size_t alignment = n_way * sizeof (Hash);	// (4 * 24)
 		alignment = (alignment & -alignment) - 1;	// LS1B - 1 (0x1f)
-=======
-		size_t alignment = n_way * sizeof (Hash);
-		alignment = (alignment & -alignment) - 1;	// LS1B - 1
->>>>>>> c7739ca (Clearer Hash align for non-pow-2 sizeof(HASH))
-=======
-		size_t alignment = n_way * sizeof (Hash);	// (4 * 48)
-		alignment = (alignment & -alignment) - 1;	// LS1B - 1 (0x3f)
->>>>>>> 494a38b (AVX/SSE optimized hash_cleanup)
-=======
-		size_t alignment = n_way * sizeof (Hash);	// (4 * 24)
-		alignment = (alignment & -alignment) - 1;	// LS1B - 1 (0x1f)
->>>>>>> 42dc349 (add sfence to be sure; correct comments)
 		hash_table->hash = (Hash*) (((size_t) hash_table->memory + alignment) & ~alignment);
 		hash_table->hash_mask = size - n_way;
 	} else {
@@ -189,39 +86,15 @@ void hash_init(HashTable *hash_table, const unsigned long long size)
  */
 void hash_cleanup(HashTable *hash_table)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int i = 0, imax = hash_table->hash_mask + HASH_N_WAY;
 	Hash *pHash = hash_table->hash;
-=======
-	unsigned int i;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	unsigned int i, imax = hash_table->hash_mask + HASH_N_WAY;
-=======
-	unsigned int i = 0, imax = hash_table->hash_mask + HASH_N_WAY;
->>>>>>> 494a38b (AVX/SSE optimized hash_cleanup)
-	Hash *pHash = hash_table->hash;
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
 
 	assert(hash_table != NULL && hash_table->hash != NULL);
 
 	info("< cleaning hashtable >\n");
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-  #if defined(hasSSE2) || defined(USE_MSVC_X86)
-	if (hasSSE2 && (sizeof(Hash) == 24) && (((size_t) pHash & 0x1f) == 0) && (imax >= 7)) {
-=======
 
   #if defined(hasSSE2) || defined(USE_MSVC_X86)
-<<<<<<< HEAD
-	if (hasSSE2 && (sizeof(Hash) == 24) && (((uintptr_t) pHash & 0x1f) == 0) && (imax >= 7)) {
->>>>>>> 494a38b (AVX/SSE optimized hash_cleanup)
-=======
 	if (hasSSE2 && (sizeof(Hash) == 24) && (((size_t) pHash & 0x1f) == 0) && (imax >= 7)) {
->>>>>>> 47c2589 (Fix w32-modern build and gcc build)
 		for (; i < 4; ++i, ++pHash) {
 			HASH_COLLISIONS(pHash->key = 0;)
 			pHash->board.player = pHash->board.opponent = 0;
@@ -246,22 +119,10 @@ void hash_cleanup(HashTable *hash_table)
 			_mm_stream_si128((__m128i *) pHash + 2, d2);
 		}
     #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 42dc349 (add sfence to be sure; correct comments)
 		_mm_sfence();
 	}
   #endif
 	for (; i <= imax; ++i, ++pHash) {
-=======
-	for (i = 0; i <= imax; ++i, ++pHash) {
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
-=======
-	}
-  #endif
-	for (; i <= imax; ++i, ++pHash) {
->>>>>>> 494a38b (AVX/SSE optimized hash_cleanup)
 		HASH_COLLISIONS(pHash->key = 0;)
 		pHash->board.player = pHash->board.opponent = 0; 
 		pHash->data = HASH_DATA_INIT;
@@ -336,27 +197,11 @@ static void data_update(HashData *data, HashStoreData *storedata)
 
 	if (score < storedata->beta && score < data->upper) data->upper = (signed char) score;
 	if (score > storedata->alpha && score > data->lower) data->lower = (signed char) score;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if ((score > storedata->alpha || score == SCORE_MIN) && data->move[0] != storedata->data.move[0]) {
 		data->move[1] = data->move[0];
 		data->move[0] = storedata->data.move[0];
 	}
 	data->wl.c.cost = (unsigned char) MAX(storedata->data.wl.c.cost, data->wl.c.cost);
-=======
-	if ((score > storedata->alpha || score == SCORE_MIN) && data->move[0] != storedata->move) {
-=======
-	if ((score > storedata->alpha || score == SCORE_MIN) && data->move[0] != storedata->data.move[0]) {
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-		data->move[1] = data->move[0];
-		data->move[0] = storedata->data.move[0];
-	}
-<<<<<<< HEAD
-	data->cost = (unsigned char) MAX(storedata->data.cost, data->cost);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-	data->wl.c.cost = (unsigned char) MAX(storedata->data.wl.c.cost, data->wl.c.cost);
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	HASH_STATS(++statistics.n_hash_update;)
 }
 
@@ -381,31 +226,12 @@ static void data_upgrade(HashData *data, HashStoreData *storedata)
 
 	if (score < storedata->beta) data->upper = (signed char) score; else data->upper = SCORE_MAX;
 	if (score > storedata->alpha) data->lower = (signed char) score; else data->lower = SCORE_MIN;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if ((score > storedata->alpha || score == SCORE_MIN) && data->move[0] != storedata->data.move[0]) {
-		data->move[1] = data->move[0];
-		data->move[0] = storedata->data.move[0];
-	}
-	data->wl.us.selectivity_depth = storedata->data.wl.us.selectivity_depth;
-	data->wl.c.cost = (unsigned char) MAX(storedata->data.wl.c.cost, data->wl.c.cost);  // this may not work well in parallel search.
-=======
-	if ((score > storedata->alpha || score == SCORE_MIN) && data->move[0] != storedata->move) {
-=======
 	if ((score > storedata->alpha || score == SCORE_MIN) && data->move[0] != storedata->data.move[0]) {
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 		data->move[1] = data->move[0];
 		data->move[0] = storedata->data.move[0];
 	}
-<<<<<<< HEAD
-	data->depth = storedata->data.depth;
-	data->selectivity = storedata->data.selectivity;
-	data->cost = (unsigned char) MAX(storedata->data.cost, data->cost);  // this may not work well in parallel search.
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	data->wl.us.selectivity_depth = storedata->data.wl.us.selectivity_depth;
 	data->wl.c.cost = (unsigned char) MAX(storedata->data.wl.c.cost, data->wl.c.cost);  // this may not work well in parallel search.
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	HASH_STATS(++statistics.n_hash_upgrade;)
 
 	assert(data->upper >= data->lower);
@@ -428,27 +254,12 @@ static void data_new(HashData *data, HashStoreData *storedata)
 {
 	int score = storedata->score;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	if (score < storedata->beta) data->upper = (signed char) score; else data->upper = SCORE_MAX;
 	if (score > storedata->alpha) data->lower = (signed char) score; else data->lower = SCORE_MIN;
 	if (score > storedata->alpha || score == SCORE_MIN) data->move[0] = storedata->data.move[0];
 	else data->move[0] = NOMOVE;
 	data->move[1] = NOMOVE;
 	data->wl = storedata->data.wl;
-<<<<<<< HEAD
-=======
-	if (score < storedata->beta) storedata->data.upper = (signed char) score; else storedata->data.upper = SCORE_MAX;
-	if (score > storedata->alpha) storedata->data.lower = (signed char) score; else storedata->data.lower = SCORE_MIN;
-	if (score > storedata->alpha || score == SCORE_MIN) storedata->data.move[0] = storedata->move;
-	else storedata->data.move[0] = NOMOVE;
-	storedata->data.move[1] = NOMOVE;
-	*data = storedata->data;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	assert(data->upper >= data->lower);
 }
 
@@ -472,19 +283,7 @@ static void data_new(HashData *data, HashStoreData *storedata)
  * @param storedata.score Best score.
  * @param storedata.move Best move.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 static void hash_new(Hash *hash, HashLock *lock, const Board *board, HashStoreData *storedata)
-=======
-static void hash_new(Hash *hash, HashLock *lock, const Board* board, HashStoreData *storedata)
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-static void vectorcall hash_new(Hash *hash, HashLock *lock, HBOARD board, HashStoreData *storedata)
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-static void hash_new(Hash *hash, HashLock *lock, const Board *board, HashStoreData *storedata)
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 {
 	spin_lock(lock);
 	HASH_STATS(if (date == hash->data.date) ++statistics.n_hash_remove;)
@@ -521,23 +320,7 @@ static void hash_set(Hash *hash, HashLock *lock, const Board *board, HashStoreDa
 	HASH_STATS(if (date == hash->data.date) ++statistics.n_hash_remove;)
 	HASH_STATS(++statistics.n_hash_new;)
 	HASH_COLLISIONS(hash->key = storedata->hash_code;)
-<<<<<<< HEAD
-<<<<<<< HEAD
-	hash->board = *board;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	storedata->data.move[0] = storedata->move;
-	storedata->data.move[1] = NOMOVE;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-=======
-	store_hboard(&hash->board, board);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 	hash->board = *board;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 	hash->data = storedata->data;
 	assert(hash->data.upper >= hash->data.lower);
 	spin_unlock(lock);
@@ -568,57 +351,16 @@ static void hash_set(Hash *hash, HashLock *lock, const Board *board, HashStoreDa
 static bool hash_update(Hash *hash, HashLock *lock, const Board *board, HashStoreData *storedata)
 {
 	bool ok = false;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	HashData *const data = &hash->data;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 
 	if (board_equal(&hash->board, board)) {
 		spin_lock(lock);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code)
 		if (board_equal(&hash->board, board)) {
-=======
-		if (hboard_equal(board, &hash->board)) {
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-		if (board_equal(board, &hash->board)) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
-		if (board_equal(&hash->board, board)) {
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
-			if (hash->data.wl.us.selectivity_depth == storedata->data.wl.us.selectivity_depth)
-				data_update(&hash->data, storedata);
-			else	data_upgrade(&hash->data, storedata);
-			hash->data.wl.c.date = storedata->data.wl.c.date;
-			if (hash->data.lower > hash->data.upper) { // reset the hash-table...
-				data_new(&hash->data, storedata);
-=======
-		if (hash->board.player == board->player && hash->board.opponent == board->opponent) {
-<<<<<<< HEAD
-			if (data->selectivity == storedata->data.selectivity && data->depth == storedata->data.depth)
-				data_update(data, storedata);
-			else	data_upgrade(data, storedata);
-			data->date = storedata->data.date;
-			if (data->lower > data->upper) { // reset the hash-table...
-				data_new(data, storedata);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 			if (hash->data.wl.us.selectivity_depth == storedata->data.wl.us.selectivity_depth)
 				data_update(&hash->data, storedata);
 			else	data_upgrade(&hash->data, storedata);
 			hash->data.wl.c.date = storedata->data.wl.c.date;
 			if (hash->data.lower > hash->data.upper) { // reset the hash-table...
 				data_new(&hash->data, storedata);
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 			}
 			ok = true;
 		} 
@@ -652,41 +394,9 @@ static bool hash_replace(Hash *hash, HashLock *lock, const Board *board, HashSto
 {
 	bool ok = false;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (board_equal(&hash->board, board)) {
-=======
-	if (hash->board.player == board->player && hash->board.opponent == board->opponent) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-		spin_lock(lock);
-<<<<<<< HEAD
-		if (board_equal(&hash->board, board)) {
-=======
-		if (hash->board.player == board->player && hash->board.opponent == board->opponent) {
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-	if (board_equal(&hash->board, board)) {
-		spin_lock(lock);
-		if (board_equal(&hash->board, board)) {
->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code)
-=======
-	if (hboard_equal(board, &hash->board)) {
-		spin_lock(lock);
-		if (hboard_equal(board, &hash->board)) {
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-	if (board_equal(board, &hash->board)) {
-		spin_lock(lock);
-		if (board_equal(board, &hash->board)) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
 	if (board_equal(&hash->board, board)) {
 		spin_lock(lock);
 		if (board_equal(&hash->board, board)) {
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 			data_new(&hash->data, storedata);
 			ok = true;
 		}
@@ -711,8 +421,6 @@ static bool hash_replace(Hash *hash, HashLock *lock, const Board *board, HashSto
 static bool hash_reset(Hash *hash, HashLock *lock, const Board *board, HashStoreData *storedata)
 {
 	bool ok = false;
-<<<<<<< HEAD
-<<<<<<< HEAD
 
 	if (board_equal(&hash->board, board)) {
 		spin_lock(lock);
@@ -732,42 +440,6 @@ static bool hash_reset(Hash *hash, HashLock *lock, const Board *board, HashStore
 				// } else {
 				//	hash->data.move[1] = storedata->data.move[0];
 				// }
-=======
-	HashData *const data = &hash->data;
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-
-	if (hash->board.player == board->player && hash->board.opponent == board->opponent) {
-		spin_lock(lock);
-		if (hash->board.player == board->player && hash->board.opponent == board->opponent) {
-			if (hash->data.wl.us.selectivity_depth == storedata->data.wl.us.selectivity_depth) {
-				if (hash->data.lower < storedata->data.lower) hash->data.lower = storedata->data.lower;
-				if (hash->data.upper > storedata->data.upper) hash->data.upper = storedata->data.upper;
-			} else {
-				hash->data.lower = storedata->data.lower;
-				hash->data.upper = storedata->data.upper;
-			}
-<<<<<<< HEAD
-			data->cost = 0;
-			data->date = storedata->data.date;
-			if (storedata->move != NOMOVE) {
-				if (data->move[0] != storedata->move) {
-					data->move[1] = data->move[0];
-					data->move[0] = storedata->move;
-				} else {
-					data->move[1] = storedata->move;
-				}
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-			hash->data.wl = storedata->data.wl;
-			if (storedata->data.move[0] != NOMOVE) {
-				// if (hash->data.move[0] != storedata->data.move[0]) {
-					hash->data.move[1] = hash->data.move[0];
-					hash->data.move[0] = storedata->data.move[0];
-				// } else {
-				//	hash->data.move[1] = storedata->data.move[0];
-				// }
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 			}
 			ok = true;
 		}
@@ -793,18 +465,8 @@ void hash_feed(HashTable *hash_table, const Board *board, const unsigned long lo
 	HashLock *lock; 
 	int i;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	storedata->data.wl.c.date = hash_table->date ? hash_table->date : 1;
-	storedata->data.wl.c.cost = 0;
-=======
-
-	storedata->data.date = hash_table->date ? hash_table->date : 1;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	storedata->data.wl.c.date = hash_table->date ? hash_table->date : 1;
 	storedata->data.wl.c.cost = 0;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 
 	worst = hash = hash_table->hash + (hash_code & hash_table->hash_mask);
 	lock = hash_table->lock + (hash_code & hash_table->lock_mask);
@@ -820,13 +482,6 @@ void hash_feed(HashTable *hash_table, const Board *board, const unsigned long lo
 
 	// new entry
 	HASH_COLLISIONS(storedata->hash_code = hash_code;)
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	storedata->data.cost = 0;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	hash_set(worst, lock, board, storedata);
 }
 
@@ -867,15 +522,7 @@ void hash_store(HashTable *hash_table, const Board *board, const unsigned long l
 
 	worst = hash = hash_table->hash + (hash_code & hash_table->hash_mask);
 	lock = hash_table->lock + (hash_code & hash_table->lock_mask);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	storedata->data.wl.c.date = hash_table->date;
-=======
-	storedata->data.date = hash_table->date;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	storedata->data.wl.c.date = hash_table->date;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	if (hash_update(hash, lock, board, storedata)) return;
 
 	for (i = 1; i < HASH_N_WAY; ++i) {
@@ -914,21 +561,8 @@ void hash_force(HashTable *hash_table, const Board *board, const unsigned long l
 
 	worst = hash = hash_table->hash + (hash_code & hash_table->hash_mask);
 	lock = hash_table->lock + (hash_code & hash_table->lock_mask);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	storedata->data.wl.c.date = hash_table->date;
-	if (hash_replace(hash, lock, board, storedata)) return;
-=======
-	if (hash_replace(hash, lock, board, hash_table->date, depth, selectivity, cost, alpha, beta, score, move)) return;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	storedata->data.date = hash_table->date;
-=======
 	storedata->data.wl.c.date = hash_table->date;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 	if (hash_replace(hash, lock, board, storedata)) return;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
 
 	for (i = 1; i < HASH_N_WAY; ++i) {
 		++hash;
@@ -938,21 +572,8 @@ void hash_force(HashTable *hash_table, const Board *board, const unsigned long l
 		}
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	HASH_COLLISIONS(storedata->hash_code = hash_code;)
 	hash_new(worst, lock, board, storedata);
-=======
-#if (HASH_COLLISIONS(1)+0) 
-	hash_new(worst, lock, hash_code, board, hash_table->date, depth, selectivity, cost, alpha, beta, score, move);
-#else 
-	hash_new(worst, lock, board, hash_table->date, depth, selectivity, cost, alpha, beta, score, move);
-#endif
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	HASH_COLLISIONS(storedata->hash_code = hash_code;)
-	hash_new(worst, lock, board, storedata);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
 }
 
 /**
@@ -978,15 +599,7 @@ bool hash_get(HashTable *hash_table, const Board *board, const unsigned long lon
 		HASH_COLLISIONS(if (hash->key == hash_code) {)
 		HASH_COLLISIONS(	lock = hash_table->lock + (hash_code & hash_table->lock_mask);)
 		HASH_COLLISIONS(	spin_lock(lock);)
-<<<<<<< HEAD
-<<<<<<< HEAD
-		HASH_COLLISIONS(	if (hash->key == hash_code && !vboard_equal(board, &hash->board)) {)
-=======
-		HASH_COLLISIONS(	if (hash->key == hash_code && !board_equal(&hash->board, board)) {)
->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code)
-=======
 		HASH_COLLISIONS(	if (hash->key == hash_code && !vboard_equal(board, &hash->board)) {)
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 		HASH_COLLISIONS(		++statistics.n_hash_collision;)
 		HASH_COLLISIONS(		printf("key = %llu\n", hash_code);)
 		HASH_COLLISIONS(		board_print(board, WHITE, stdout);)
@@ -1068,17 +681,8 @@ void hash_exclude_move(HashTable *hash_table, const Board *board, const unsigned
  */
 void hash_copy(const HashTable *src, HashTable *dest)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	unsigned int i, imax = src->hash_mask + HASH_N_WAY;
 	Hash *pSrc = src->hash, *pDest = dest->hash;
-=======
-	unsigned int i;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	unsigned int i, imax = src->hash_mask + HASH_N_WAY;
-	Hash *pSrc = src->hash, *pDest = dest->hash;
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
 
 	assert(src->hash_mask == dest->hash_mask);
 	info("<hash copy>\n");
@@ -1101,13 +705,5 @@ void hash_print(const HashData *data, FILE *f)
 	fprintf(f, "moves = %s, ", move_to_string(data->move[0], WHITE, s_move));
 	fprintf(f, "%s ; ", move_to_string(data->move[1], WHITE, s_move));
 	fprintf(f, "score = [%+02d, %+02d] ; ", data->lower, data->upper);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	fprintf(f, "level = %2d:%2d:%2d@%3d%%", data->wl.c.date, data->wl.c.cost, data->wl.c.depth, selectivity_table[data->wl.c.selectivity].percent);
-=======
-	fprintf(f, "level = %2d:%2d:%2d@%3d%%", data->date, data->cost, data->depth, selectivity_table[data->selectivity].percent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	fprintf(f, "level = %2d:%2d:%2d@%3d%%", data->wl.c.date, data->wl.c.cost, data->wl.c.depth, selectivity_table[data->wl.c.selectivity].percent);
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 }
diff --git a/src/hash.h b/src/hash.h
index 363bed6..d16c170 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -3,23 +3,7 @@
  *
  * Hash table's header.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2020
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -99,88 +83,26 @@ typedef struct HashStoreData {
 	int alpha;
 	int beta;
 	int score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	unsigned char move;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 } HashStoreData;
 
 /* declaration */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
-// use vectored board if vectorcall available
-#ifdef _M_X64
-	#define	HBOARD	__m128i
-	#define	HBOARD_P(b)	_mm_loadu_si128((__m128i *) (b))
-	#define	HBOARD_V(b)	((b).v2)
-#elif defined(__aarch64__) || defined(_M_ARM64)
-	#define	HBOARD	uint64x2_t
-	#define	HBOARD_P(b)	vld1q_u64((uint64_t *) (b))
-	#define	HBOARD_V(b)	((b).v2)
-#else
-	#define	HBOARD	const Board *
-	#define	HBOARD_P(b)	(b)
-	#define	HBOARD_V(b)	(&(b).board)
-#endif
-=======
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 
->>>>>>> e88638e (add vectorcall interface to hash functions)
-void hash_move_init(void);
-=======
->>>>>>> a9633d5 (Initial 4.5.2; some reformats)
-=======
 void hash_move_init(void);
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 void hash_init(HashTable*, const unsigned long long);
 void hash_cleanup(HashTable*);
 void hash_clear(HashTable*);
 void hash_free(HashTable*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 void hash_feed(HashTable*, const Board *, const unsigned long long, HashStoreData *);
 void hash_store(HashTable*, const Board *, const unsigned long long, HashStoreData *);
 void hash_force(HashTable*, const Board *, const unsigned long long, HashStoreData *);
 bool hash_get(HashTable*, const Board *, const unsigned long long, HashData *);
 bool hash_get_from_board(HashTable*, const Board *, HashData *);
 void hash_exclude_move(HashTable*, const Board *, const unsigned long long, const int);
-<<<<<<< HEAD
-=======
-void hash_feed(HashTable*, const Board*, const unsigned long long, HashStoreData *);
-void hash_store(HashTable*, const Board*, const unsigned long long, HashStoreData *);
-void hash_force(HashTable*, const Board*, const unsigned long long, HashStoreData *);
-bool hash_get_from_board(HashTable*, const Board*, HashData *);
-void hash_exclude_move(HashTable*, const Board*, const unsigned long long, const int);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-void vectorcall hash_feed(HashTable*, HBOARD, const unsigned long long, HashStoreData *);
-void vectorcall hash_store(HashTable*, HBOARD, const unsigned long long, HashStoreData *);
-void vectorcall hash_force(HashTable*, HBOARD, const unsigned long long, HashStoreData *);
-bool vectorcall hash_get(HashTable*, HBOARD, const unsigned long long, HashData *);
-bool hash_get_from_board(HashTable*, HBOARD, HashData *);
-void vectorcall hash_exclude_move(HashTable*, HBOARD, const unsigned long long, const int);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 void hash_copy(const HashTable*, HashTable*);
 void hash_print(const HashData*, FILE*);
 extern unsigned int writeable_level(HashData *data);
 
 extern const HashData HASH_DATA_INIT;
-<<<<<<< HEAD
 
 inline void hash_prefetch(HashTable *hashtable, unsigned long long hashcode) {
 	Hash *p = hashtable->hash + (hashcode & hashtable->hash_mask);
@@ -195,36 +117,5 @@ inline void hash_prefetch(HashTable *hashtable, unsigned long long hashcode) {
 	__builtin_prefetch(p + HASH_N_WAY - 1);
   #endif
 }
-=======
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-
-<<<<<<< HEAD
-#ifdef hasSSE2
-	#define	hash_prefetch(hashtable, hashcode)	_mm_prefetch((char const*)((hashtable)->hash + ((hashcode) & (hashtable)->hash_mask)), _MM_HINT_T0)
-#elif defined(__ARM_ACLE)
-	#define	hash_prefetch(hashtable, hashcode)	__pld((hashtable)->hash + ((hashcode) & (hashtable)->hash_mask))
-#elif defined(__GNUC__)
-	#define	hash_prefetch(hashtable, hashcode)	__builtin_prefetch((hashtable)->hash + ((hashcode) & (hashtable)->hash_mask))
-#else
-	#define	hash_prefetch(hashtable, hashcode)
-#endif
-<<<<<<< HEAD
-=======
-=======
-inline void hash_prefetch(HashTable *hashtable, unsigned long long hashcode) {
-	Hash *p = hashtable->hash + (hashcode & hashtable->hash_mask);
-  #ifdef hasSSE2
-	_mm_prefetch((char const *) p, _MM_HINT_T0);
-	_mm_prefetch((char const *)(p + HASH_N_WAY - 1), _MM_HINT_T0);
-  #elif defined(__ARM_ACLE)
-	__pld(p);
-	__pld(p + HASH_N_WAY - 1);
-  #elif defined(__GNUC__)
-	__builtin_prefetch(p);
-	__builtin_prefetch(p + HASH_N_WAY - 1);
-  #endif
-}
->>>>>>> 30464b5 (add hash_prefetch to NWS_endgame)
 
 #endif
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
diff --git a/src/main.c b/src/main.c
index 3359bbd..55e8a44 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,31 +3,7 @@
  *
  * @brief Main file.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
-=======
- * @date 1998 - 2018
->>>>>>> 0451a79 (Fix AVX eval alignment in UI)
-=======
- * @date 1998 - 2020
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> 3030e39 (Fix android build; revise copyright in title)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
-=======
- * @date 1998 - 2024
->>>>>>> a09308f (Renew version string and copyright year)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -58,28 +34,7 @@ void version(void)
 #elif defined(__APPLE__)
 		" for Apple"
 #endif
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		"\ncopyright 1998 - 2018 Richard Delorme, 2014 - 24 Toshihiko Okuhara\n\n");
-=======
-
-	fprintf(stderr, "copyright 1998 - 2018 Richard Delorme\n\n");
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
-		"\ncopyright 1998 - 2018 Richard Delorme\n\n");
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-		"\ncopyright 1998 - 2018 Richard Delorme, 2014 - 22 Toshihiko Okuhara\n\n");
->>>>>>> 3030e39 (Fix android build; revise copyright in title)
-=======
-		"\ncopyright 1998 - 2018 Richard Delorme, 2014 - 23 Toshihiko Okuhara\n\n");
->>>>>>> f87d2a3 (flip_avx_shuf_max.c added; small improvements in other flip's)
-=======
 		"\ncopyright 1998 - 2018 Richard Delorme, 2014 - 24 Toshihiko Okuhara\n\n");
->>>>>>> a09308f (Renew version string and copyright year)
 }
 
 
@@ -160,13 +115,6 @@ int main(int argc, char **argv)
 	// initialize
 	bit_init();
 	edge_stability_init();
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	hash_move_init();
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 	statistics_init();
 	eval_open(options.eval_file);
 	search_global_init();
diff --git a/src/midgame.c b/src/midgame.c
index a5a56d7..e944962 100644
--- a/src/midgame.c
+++ b/src/midgame.c
@@ -3,34 +3,9 @@
  *
  * Search near the end of the game.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2023
->>>>>>> 6de3ab1 (Omit eval_weight table for ply > 53)
- * @author Richard Delorme
- * @author Toshihiko Okuhara
-=======
- * @date 1998 - 2022
  * @author Richard Delorme
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 9794cc1 (Store solid-normalized hash in PVS_midgame)
-=======
  * @author Toshihiko Okuhara
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
- * @author Toshihiko Okuhara
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
  * @version 4.5
  */
 
@@ -51,10 +26,6 @@
 #define RCD 0.5
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 /**
  * @brief evaluate a midgame position with the evaluation function.
  *
@@ -68,10 +39,6 @@ static int accumlate_eval(int ply, Eval *eval)
 	const Eval_weight *w;
 	int sum;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 6de3ab1 (Omit eval_weight table for ply > 53)
 	if (ply >= EVAL_N_PLY)
 		ply = EVAL_N_PLY - 2 + (ply & 1);
 	ply -= 2;
@@ -80,23 +47,6 @@ static int accumlate_eval(int ply, Eval *eval)
 	w = &(*EVAL_WEIGHT)[ply];
 
 #if defined(__AVX2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__)
-<<<<<<< HEAD
-=======
-/**
- * @brief evaluate a midgame position with the evaluation function.
- *
- * @param w	Eval_weight for this ply.
- * @param eval	Evaluation function.
- */
-static int accumlate_eval(const Eval_weight *w, Eval *eval)
-{
-	unsigned short *f = eval->feature.us;
-	int sum;
-
-	assert(w < &EVAL_WEIGHT[EVAL_N_PLY]);
-
-#if defined(__AVX2__) && !defined(AMD_BEFORE_ZEN3)
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 	enum {
 		W_C9 = offsetof(Eval_weight, C9) / sizeof(short) - 1,	// -1 to load the data into hi-word
 		W_C10 = offsetof(Eval_weight, C10) / sizeof(short) - 1,
@@ -114,7 +64,6 @@ static int accumlate_eval(const Eval_weight *w, Eval *eval)
 	DD = _mm256_i32gather_epi32((int *) w, FF, 2);
 	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
 
-<<<<<<< HEAD
 	DD = _mm256_i32gather_epi32((int *)((short *) w->S8x4 - 1), _mm256_cvtepu16_epi32(eval->feature.v8[2]), 2);
 	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
 
@@ -126,19 +75,6 @@ static int accumlate_eval(const Eval_weight *w, Eval *eval)
 	__m128i S = _mm_add_epi32(_mm256_castsi256_si128(SS), _mm256_extracti128_si256(SS, 1));
 
 	__m128i D = _mm_i32gather_epi32((int *)((short *) w->S8x4 - 1), _mm_cvtepu16_epi32(eval->feature.v8[3]), 2);
-=======
-	DD = _mm256_i32gather_epi32((int *)(w->S8x4 - 1), _mm256_cvtepu16_epi32(eval->feature.v8[2]), 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-
-	DD = _mm256_i32gather_epi32((int *)(w->S7654 - 1), _mm256_cvtepu16_epi32(*(__m128i *) &f[30]), 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-
-	DD = _mm256_i32gather_epi32((int *)(w->S7654 - 1), _mm256_cvtepu16_epi32(*(__m128i *) &f[38]), 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-	__m128i S = _mm_add_epi32(_mm256_castsi256_si128(SS), _mm256_extracti128_si256(SS, 1));
-
-	__m128i D = _mm_i32gather_epi32((int *)(w->S8x4 - 1), _mm_cvtepu16_epi32(eval->feature.v8[3]), 2);
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 	S = _mm_add_epi32(S, _mm_srai_epi32(D, 16));
 
 	S = _mm_hadd_epi32(S, S);
@@ -158,7 +94,6 @@ static int accumlate_eval(const Eval_weight *w, Eval *eval)
 	  + w->S7654[f[42]] + w->S7654[f[43]] + w->S7654[f[44]] + w->S7654[f[45]];
 #endif
 	return sum + w->S8x4[f[28]] + w->S8x4[f[29]] + w->S0;
-<<<<<<< HEAD
 }
 
 /**
@@ -169,180 +104,12 @@ static int accumlate_eval(const Eval_weight *w, Eval *eval)
  */
 int search_eval_0(Search *search)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	const short *w = EVAL_WEIGHT[search->eval->player][60 - search->n_empties];
-	unsigned short int *f = search->eval->feature.us;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-	const short *w = EVAL_WEIGHT[search->eval.player][60 - search->n_empties];
-=======
-	const short *w = EVAL_WEIGHT[search->eval.player][60 - search->eval.n_empties];
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-	const short *w = (*EVAL_WEIGHT)[60 - search->eval.n_empties];
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-	unsigned short int *f = search->eval.feature.us;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
 	int score;
 
 	SEARCH_STATS(++statistics.n_search_eval_0);
 	SEARCH_UPDATE_EVAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
 	score = accumlate_eval(60 - search->eval.n_empties,  &search->eval);
-=======
-	score = w[f[ 0] + 0] + w[f[ 1] + 0] + w[f[ 2] + 0] + w[f[ 3] + 0]
-=======
-static int accumlate_eval(const short *w, Eval *eval)
-{
-	unsigned short *f = eval->feature.us;
-	return w[f[ 0] + 0] + w[f[ 1] + 0] + w[f[ 2] + 0] + w[f[ 3] + 0]
->>>>>>> e3334bd (Groups out accumlate_eval subroutine)
-	  + w[f[ 4] + 19683] + w[f[ 5] + 19683] + w[f[ 6] + 19683] + w[f[ 7] + 19683]
-	  + w[f[ 8] + 78732] + w[f[ 9] + 78732] + w[f[10] + 78732] + w[f[11] + 78732]
-	  + w[f[12] + 137781] + w[f[13] + 137781] + w[f[14] + 137781] + w[f[15] + 137781]
-	  + w[f[16] + 196830] + w[f[17] + 196830] + w[f[18] + 196830] + w[f[19] + 196830]
-	  + w[f[20] + 203391] + w[f[21] + 203391] + w[f[22] + 203391] + w[f[23] + 203391]
-	  + w[f[24] + 209952] + w[f[25] + 209952] + w[f[26] + 209952] + w[f[27] + 209952]
-	  + w[f[28] + 216513] + w[f[29] + 216513]
-	  + w[f[30] + 223074] + w[f[31] + 223074] + w[f[32] + 223074] + w[f[33] + 223074]
-	  + w[f[34] + 225261] + w[f[35] + 225261] + w[f[36] + 225261] + w[f[37] + 225261]
-	  + w[f[38] + 225990] + w[f[39] + 225990] + w[f[40] + 225990] + w[f[41] + 225990]
-	  + w[f[42] + 226233] + w[f[43] + 226233] + w[f[44] + 226233] + w[f[45] + 226233]
-	  + w[f[46] + 226314];
-<<<<<<< HEAD
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-=======
-static int accumlate_eval(int ply, Eval *eval)
-=======
-/**
- * @brief evaluate a midgame position with the evaluation function.
- *
- * @param w	Eval_weight for this ply.
- * @param eval	Evaluation function.
- */
-static int accumlate_eval(const Eval_weight *w, Eval *eval)
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-{
-	unsigned short *f = eval->feature.us;
-	int sum;
-
-	assert(w < &EVAL_WEIGHT[EVAL_N_PLY]);
-=======
-	assert(ply < EVAL_N_PLY);
-<<<<<<< HEAD
-	if (ply < 2)
-		ply += 2;
-	w = &(*EVAL_WEIGHT)[ply - 2];
->>>>>>> 4303b09 (Returns all full lines in full[4])
-=======
-	ply -= 2;
-	if (ply < 0)
-		ply &= 1;
-	w = &(*EVAL_WEIGHT)[ply];
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
-
-#if defined(__AVX2__) && !defined(AMD_BEFORE_ZEN3)
-=======
->>>>>>> 24abc1e (Revise comments and readme)
-	enum {
-		W_C9 = offsetof(Eval_weight, C9) / sizeof(short) - 1,	// -1 to load the data into hi-word
-		W_C10 = offsetof(Eval_weight, C10) / sizeof(short) - 1,
-		W_S100 = offsetof(Eval_weight, S100) / sizeof(short) - 1,
-		W_S101 = offsetof(Eval_weight, S101) / sizeof(short) - 1
-	};
-
-	__m256i FF = _mm256_add_epi32(_mm256_cvtepu16_epi32(eval->feature.v8[0]),
-		_mm256_set_epi32(W_C10, W_C10, W_C10, W_C10, W_C9, W_C9, W_C9, W_C9));
-	__m256i DD = _mm256_i32gather_epi32((int *) w, FF, 2);
-	__m256i SS = _mm256_srai_epi32(DD, 16);	// sign extend
-
-	FF = _mm256_add_epi32(_mm256_cvtepu16_epi32(eval->feature.v8[1]),
-		_mm256_set_epi32(W_S101, W_S101, W_S101, W_S101, W_S100, W_S100, W_S100, W_S100));
-	DD = _mm256_i32gather_epi32((int *) w, FF, 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-
-	DD = _mm256_i32gather_epi32((int *)((short *) w->S8x4 - 1), _mm256_cvtepu16_epi32(eval->feature.v8[2]), 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-
-	DD = _mm256_i32gather_epi32((int *)((short *) w->S7654 - 1), _mm256_cvtepu16_epi32(*(__m128i *) &f[30]), 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-
-	DD = _mm256_i32gather_epi32((int *)((short *) w->S7654 - 1), _mm256_cvtepu16_epi32(*(__m128i *) &f[38]), 2);
-	SS = _mm256_add_epi32(SS, _mm256_srai_epi32(DD, 16));
-	__m128i S = _mm_add_epi32(_mm256_castsi256_si128(SS), _mm256_extracti128_si256(SS, 1));
-
-	__m128i D = _mm_i32gather_epi32((int *)((short *) w->S8x4 - 1), _mm_cvtepu16_epi32(eval->feature.v8[3]), 2);
-	S = _mm_add_epi32(S, _mm_srai_epi32(D, 16));
-
-	S = _mm_hadd_epi32(S, S);
-	sum = _mm_cvtsi128_si32(S) + _mm_extract_epi32(S, 1);
-
-#else
-	sum = w->C9[f[ 0]] + w->C9[f[ 1]] + w->C9[f[ 2]] + w->C9[f[ 3]]
-	  + w->C10[f[ 4]] + w->C10[f[ 5]] + w->C10[f[ 6]] + w->C10[f[ 7]]
-<<<<<<< HEAD
-	  + w->S10[0][f[ 8]] + w->S10[0][f[ 9]] + w->S10[0][f[10]] + w->S10[0][f[11]]
-	  + w->S10[1][f[12]] + w->S10[1][f[13]] + w->S10[1][f[14]] + w->S10[1][f[15]]
-	  + w->S8[0][f[16]] + w->S8[0][f[17]] + w->S8[0][f[18]] + w->S8[0][f[19]]
-	  + w->S8[1][f[20]] + w->S8[1][f[21]] + w->S8[1][f[22]] + w->S8[1][f[23]]
-	  + w->S8[2][f[24]] + w->S8[2][f[25]] + w->S8[2][f[26]] + w->S8[2][f[27]]
-	  + w->S8[3][f[28]] + w->S8[3][f[29]]
-	  + w->S7[f[30]] + w->S7[f[31]] + w->S7[f[32]] + w->S7[f[33]]
-	  + w->S6[f[34]] + w->S6[f[35]] + w->S6[f[36]] + w->S6[f[37]]
-	  + w->S5[f[38]] + w->S5[f[39]] + w->S5[f[40]] + w->S5[f[41]]
-	  + w->S4[f[42]] + w->S4[f[43]] + w->S4[f[44]] + w->S4[f[45]]
-	  + w->S0;
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
-	  + w->S100[f[ 8]] + w->S100[f[ 9]] + w->S100[f[10]] + w->S100[f[11]]
-	  + w->S101[f[12]] + w->S101[f[13]] + w->S101[f[14]] + w->S101[f[15]]
-	  + w->S8x4[f[16]] + w->S8x4[f[17]] + w->S8x4[f[18]] + w->S8x4[f[19]]
-	  + w->S8x4[f[20]] + w->S8x4[f[21]] + w->S8x4[f[22]] + w->S8x4[f[23]]
-	  + w->S8x4[f[24]] + w->S8x4[f[25]] + w->S8x4[f[26]] + w->S8x4[f[27]]
-	  + w->S7654[f[30]] + w->S7654[f[31]] + w->S7654[f[32]] + w->S7654[f[33]]
-	  + w->S7654[f[34]] + w->S7654[f[35]] + w->S7654[f[36]] + w->S7654[f[37]]
-	  + w->S7654[f[38]] + w->S7654[f[39]] + w->S7654[f[40]] + w->S7654[f[41]]
-	  + w->S7654[f[42]] + w->S7654[f[43]] + w->S7654[f[44]] + w->S7654[f[45]];
-#endif
-	return sum + w->S8x4[f[28]] + w->S8x4[f[29]] + w->S0;
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-}
-
-/**
- * @brief evaluate a midgame position with the evaluation function.
- *
- * @param search Position to evaluate.
- * @return An evaluated score.
- */
-int search_eval_0(Search *search)
-{
-	int score;
-
-	SEARCH_STATS(++statistics.n_search_eval_0);
-	SEARCH_UPDATE_EVAL_NODES(search->n_nodes);
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	score = accumlate_eval((*EVAL_WEIGHT)[60 - search->eval.n_empties],  &search->eval);
->>>>>>> e3334bd (Groups out accumlate_eval subroutine)
-=======
-	score = accumlate_eval(60 - search->eval.n_empties,  &search->eval);
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
-	score = accumlate_eval(&(*EVAL_WEIGHT)[60 - search->eval.n_empties],  &search->eval);
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-	score = accumlate_eval(&(*EVAL_WEIGHT)[60 - search->eval.n_empties],  &search->eval);
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
 
 	if (score > 0) score += 64;	else score -= 64;
 	score /= 128;
@@ -365,269 +132,22 @@ int search_eval_0(Search *search)
  * @param moves Next turn legal moves.
  * @return An evaluated min score.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 int search_eval_1(Search *search, int alpha, int beta, unsigned long long moves)
-=======
-int search_eval_1(Search *search, const int alpha, int beta, unsigned long long moves)
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-int search_eval_1(Search *search, const int alpha, int beta, bool pass1)
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int x, score, bestscore, alphathres;
 	unsigned long long flipped;
 	Eval Ev;
 	V2DI board0;
-=======
-	const short *w = EVAL_WEIGHT[search->eval->player ^ 1][61 - search->n_empties];
-=======
-	const short *w = EVAL_WEIGHT[search->eval.player ^ 1][61 - search->n_empties];
-<<<<<<< HEAD
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-	Move move[1];
-=======
-=======
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-	Move move;
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	SquareList *empty;
-	Eval Ev;
-	int score, bestscore;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const Board *board = search->board;
-	unsigned long long moves = get_moves(board->player, board->opponent);
-<<<<<<< HEAD
-	unsigned short int *f;
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-=======
-	unsigned long long moves = get_moves(search->board.player, search->board.opponent);
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	unsigned short *f;
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
-=======
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	Eval Ev;
-	int x, score, bestscore;
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-=======
-	Eval Ev;
-	int x, score, bestscore, betathres;
-	unsigned long long flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-	const short *w;
-<<<<<<< HEAD
-	const unsigned short *f;
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
->>>>>>> e3334bd (Groups out accumlate_eval subroutine)
-=======
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-	const Eval_weight *w;
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-	const Eval_weight *w;
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-=======
-	unsigned long long moves = get_moves(search->board.player, search->board.opponent);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	unsigned long long moves = board_get_moves(&search->board);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
-=======
-	int x, score, bestscore, betathres;
-	unsigned long long flipped, moves;
-=======
-int search_eval_1(Search *search, int alpha, int beta, unsigned long long moves)
-{
-	int x, score, bestscore, alphathres;
-	unsigned long long flipped;
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
-	Eval Ev;
-	V2DI board0;
->>>>>>> fe6dce7 (consistent vboard usage for eval_1 and eval_2)
 
 	SEARCH_STATS(++statistics.n_search_eval_1);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
 	if (moves) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		bestscore = SCORE_INF * 128;	// min stage
-		if (alpha < SCORE_MIN + 1) alphathres = ((SCORE_MIN + 1) * 128) + 64;
-		else alphathres = (alpha * 128) + 63 + (int) (alpha < 0);	// highest score rounded to alpha
-=======
-=======
-		w = EVAL_WEIGHT[search->eval.player ^ 1][61 - search->n_empties];
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-		w = EVAL_WEIGHT[search->eval.player ^ 1][61 - search->eval.n_empties];
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-		w = (*EVAL_WEIGHT)[61 - search->eval.n_empties];
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-		w = &(*EVAL_WEIGHT)[60 - search->eval.n_empties + 1];
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-=======
-		w = &(*EVAL_WEIGHT)[60 - search->eval.n_empties + 1];
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-		bestscore = -SCORE_INF;
-		if (beta > SCORE_MAX - 1) beta = SCORE_MAX - 1;
-=======
-		bestscore = -SCORE_INF * 128;
-		if (beta > SCORE_MAX - 1) betathres = ((SCORE_MAX - 1) * 128) - 64;
-		else betathres = (beta * 128) - ((beta > 0) ? 64 : 63);	// lowest score rounded to beta
-
-<<<<<<< HEAD
->>>>>>> 8d39e74 (Loop out rounding score)
-		foreach_empty (x, search->empties) {
-			if (moves & x_to_bit(x)) {
-				flipped = board_flip(&search->board, x);
-				if (flipped == search->board.opponent)
-					return SCORE_MAX;	// wipeout
-
-				eval_update_leaf(x, flipped, &Ev, &search->eval);
-				SEARCH_UPDATE_EVAL_NODES(search->n_nodes);
-<<<<<<< HEAD
-				score = -w[f[ 0] + 0] - w[f[ 1] + 0] - w[f[ 2] + 0] - w[f[ 3] + 0]
-				  - w[f[ 4] + 19683] - w[f[ 5] + 19683] - w[f[ 6] + 19683] - w[f[ 7] + 19683]
-				  - w[f[ 8] + 78732] - w[f[ 9] + 78732] - w[f[10] + 78732] - w[f[11] + 78732]
-				  - w[f[12] + 137781] - w[f[13] + 137781] - w[f[14] + 137781] - w[f[15] + 137781]
-				  - w[f[16] + 196830] - w[f[17] + 196830] - w[f[18] + 196830] - w[f[19] + 196830]
-				  - w[f[20] + 203391] - w[f[21] + 203391] - w[f[22] + 203391] - w[f[23] + 203391]
-				  - w[f[24] + 209952] - w[f[25] + 209952] - w[f[26] + 209952] - w[f[27] + 209952]
-				  - w[f[28] + 216513] - w[f[29] + 216513]
-				  - w[f[30] + 223074] - w[f[31] + 223074] - w[f[32] + 223074] - w[f[33] + 223074]
-				  - w[f[34] + 225261] - w[f[35] + 225261] - w[f[36] + 225261] - w[f[37] + 225261]
-				  - w[f[38] + 225990] - w[f[39] + 225990] - w[f[40] + 225990] - w[f[41] + 225990]
-				  - w[f[42] + 226233] - w[f[43] + 226233] - w[f[44] + 226233] - w[f[45] + 226233]
-				  - w[f[46] + 226314];
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-				eval_restore(search->eval, move);
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-=======
-				// eval_restore(search->eval, move);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-				// eval_restore(&search->eval, &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-				score = -accumlate_eval(w, &Ev);
->>>>>>> e3334bd (Groups out accumlate_eval subroutine)
-=======
-				score = -accumlate_eval((*EVAL_WEIGHT)[61 - search->eval.n_empties], &Ev);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-				score = -accumlate_eval(60 - search->eval.n_empties + 1, &Ev);
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-
-<<<<<<< HEAD
-		board0.board = search->board;
-		x = NOMOVE;
-		do {
-			do {
-				x = search->empties[x].next;
-			} while (!(moves & x_to_bit(x)));
-=======
-				score = -accumlate_eval(w, &Ev);
-=======
-				score = -accumlate_eval(w, &Ev);
-
-<<<<<<< HEAD
-				if (score >= 0) score = (score + 64) >> 7;
-				else score = -((-score + 64) >> 7);
->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval)
-
-				if (score >= 0) score = (score + 64) >> 7;
-				else score = -((-score + 64) >> 7);
->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval)
-
-			moves &= ~x_to_bit(x);
-			flipped = vboard_flip(board0, x);
-			if (flipped == search->board.opponent)
-				return SCORE_MIN;	// wipeout
-
-			eval_update_leaf(x, flipped, &Ev, &search->eval);
-			SEARCH_UPDATE_EVAL_NODES(search->n_nodes);
-
-			score = accumlate_eval(60 - search->eval.n_empties + 1, &Ev);
-
-			if (score < bestscore)
-				bestscore = score;
-		} while (moves && (bestscore > alphathres));
-
-		if (bestscore >= 0) bestscore += 64; else bestscore -= 64;
-		bestscore /= 128;
-
-		if (bestscore < SCORE_MIN + 1) bestscore = SCORE_MIN + 1;
-		if (bestscore > SCORE_MAX - 1) bestscore = SCORE_MAX - 1;
-
-<<<<<<< HEAD
-	} else {
-<<<<<<< HEAD
-		moves = get_moves(search->board.opponent, search->board.player);
-		if (moves) {
-			search_update_pass_midgame(search, &Ev);
-			bestscore = -search_eval_1(search, -beta, -alpha, moves);
-			search_restore_pass_midgame(search, &Ev);
-=======
-		if (can_move(search->board.opponent, search->board.player)) {
-=======
-=======
->>>>>>> 8d39e74 (Loop out rounding score)
-				if (score > bestscore) {
-=======
-				if (score > bestscore)
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-					bestscore = score;
-				if (bestscore >= betathres) break;
-			}
-		}
-=======
-=======
 		bestscore = SCORE_INF * 128;	// min stage
 		if (alpha < SCORE_MIN + 1) alphathres = ((SCORE_MIN + 1) * 128) + 64;
 		else alphathres = (alpha * 128) + 63 + (int) (alpha < 0);	// highest score rounded to alpha
 
 		board0.board = search->board;
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 		x = NOMOVE;
 		do {
 			do {
@@ -646,13 +166,7 @@ int search_eval_1(Search *search, int alpha, int beta, unsigned long long moves)
 
 			if (score < bestscore)
 				bestscore = score;
-<<<<<<< HEAD
-			if (bestscore >= betathres) break;
-		} while (moves);
->>>>>>> 6a63841 (exit search_shallow/search_eval loop when all bits processed)
-=======
 		} while (moves && (bestscore > alphathres));
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 
 		if (bestscore >= 0) bestscore += 64; else bestscore -= 64;
 		bestscore /= 128;
@@ -661,40 +175,13 @@ int search_eval_1(Search *search, int alpha, int beta, unsigned long long moves)
 		if (bestscore > SCORE_MAX - 1) bestscore = SCORE_MAX - 1;
 
 	} else {
-<<<<<<< HEAD
-<<<<<<< HEAD
-		moves = get_moves(search->board.opponent, search->board.player);
-		if (moves) {
-<<<<<<< HEAD
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-			search_update_pass_midgame(search);
-			bestscore = -search_eval_1(search, -beta, -alpha, moves);
-			search_restore_pass_midgame(search);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			search_update_pass_midgame(search, &Ev);
-			bestscore = -search_eval_1(search, -beta, -alpha, moves);
-			search_restore_pass_midgame(search, &Ev);
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-		} else { // game over
-			bestscore = -search_solve(search);
-=======
-		if (pass1) { // game over
-			bestscore = search_solve(search);
-		} else {
-=======
 		moves = get_moves(search->board.opponent, search->board.player);
 		if (moves) {
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 			search_update_pass_midgame(search, &Ev);
 			bestscore = -search_eval_1(search, -beta, -alpha, moves);
 			search_restore_pass_midgame(search, &Ev);
-<<<<<<< HEAD
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
 		} else { // game over
 			bestscore = -search_solve(search);
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 		}
 	}
 
@@ -713,88 +200,12 @@ int search_eval_1(Search *search, int alpha, int beta, unsigned long long moves)
  * @param moves Next turn legal moves.
  * @return An evaluated best score.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-int search_eval_2(Search *search, int alpha, int beta, unsigned long long moves)
-=======
-int search_eval_2(Search *search, int alpha, const int beta, unsigned long long moves)
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-int search_eval_2(Search *search, int alpha, const int beta, bool pass1)
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
 int search_eval_2(Search *search, int alpha, int beta, unsigned long long moves)
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int x, bestscore, score;
 	unsigned long long flipped;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	Eval eval0;
 	V2DI board0;
-=======
-	register int bestscore, score;
-=======
-	int bestscore, score;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-	SquareList *empty;
-=======
-	int x, bestscore, score;
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	Move move;
-=======
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-	Eval Ev0;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	const Board *board = search->board;
-	const unsigned long long moves = get_moves(board->player, board->opponent);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	const unsigned long long moves = get_moves(search->board.player, search->board.opponent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	Board board0;
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	Search_Backup backup;
-<<<<<<< HEAD
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-	unsigned long long moves = get_moves(search->board.player, search->board.opponent);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
-	Eval eval0;
-<<<<<<< HEAD
-	vBoard board0 = load_vboard(search->board);
-	unsigned long long moves = vboard_get_moves(board0, search->board);
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-=======
-	V2DI board0;
-	unsigned long long moves;
-
-	board0.board = search->board;
-	moves = vboard_get_moves(board0);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-	int x, prev, bestscore, score;
-=======
-	int x, bestscore, score;
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
-	unsigned long long flipped, moves;
-=======
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
-	Eval eval0;
-	V2DI board0;
->>>>>>> fe6dce7 (consistent vboard usage for eval_1 and eval_2)
 
 	SEARCH_STATS(++statistics.n_search_eval_2);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
@@ -805,120 +216,9 @@ int search_eval_2(Search *search, int alpha, int beta, unsigned long long moves)
 
 	if (moves) {
 		bestscore = -SCORE_INF;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		eval0.feature = search->eval.feature;
 		eval0.n_empties = search->eval.n_empties--;
 		board0.board = search->board;
-<<<<<<< HEAD
-		x = NOMOVE;
-		do {
-			do {
-				x = search->empties[x].next;
-			} while (!(moves & x_to_bit(x)));
-
-<<<<<<< HEAD
-			moves &= ~x_to_bit(x);
-			// search->empties[prev].next = search->empties[x].next;	// let search_eval_1 skip the last occupied
-			flipped = vboard_next(board0, x, &search->board);
-			eval_update_leaf(x, flipped, &search->eval, &eval0);
-			score = search_eval_1(search, alpha, beta, board_get_moves(&search->board));
-			// search->empties[prev].next = x;	// restore
-
-			if (score > bestscore) {
-				bestscore = score;
-				if (bestscore >= beta) break;
-				else if (bestscore > alpha) alpha = bestscore;
-=======
-		Ev0 = search->eval;
-=======
-		Ev0.feature = search->eval.feature;
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-		Ev0.player = search->eval.player;
-		eval_swap(&search->eval);
-=======
-		Ev0.n_empties = search->eval.n_empties--;
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-		board0 = search->board;
-=======
-		backup.eval.feature = search->eval.feature;
-		backup.eval.n_empties = search->eval.n_empties--;
-		backup.board = search->board;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-		eval0.feature = search->eval.feature;
-		eval0.n_empties = search->eval.n_empties--;
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-
-<<<<<<< HEAD
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-		foreach_empty(empty, search->empties) {
-			if (moves & empty->b) {
-				move.x = empty->x;
-				move.flipped = board_next(&board0, move.x, &search->board);
-				// empty_remove(search->x_to_empties[move.x]);
-=======
-		foreach_empty(x, search->empties) {
-			if (moves & x_to_bit(x)) {
-<<<<<<< HEAD
-				flipped = board_next(&backup.board, x, &search->board);
-				// empty_remove(search->empties, x);
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-				eval_update_leaf(&search->eval, &Ev0, &move);
-=======
-				eval_update_leaf(x, flipped, &search->eval, &Ev0);
->>>>>>> 9b4cd06 (Optimize search_shallow in endgame.c; revise eval_update parameters)
-=======
-				eval_update_leaf(x, flipped, &search->eval, &backup.eval);
-<<<<<<< HEAD
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-				score = -search_eval_1(search, -beta, -alpha, get_moves(search->board.player, search->board.opponent));
-				// empty_restore(search->empties, x);
-=======
-=======
-				flipped = vboard_next(board0, x, &search->board);
-				search->empties[prev].next = search->empties[x].next;	// remove
-				eval_update_leaf(x, flipped, &search->eval, &eval0);
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
-				score = -search_eval_1(search, -beta, -alpha, false);
-				search->empties[prev].next = x;	// restore
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-
-				if (score > bestscore) {
-					bestscore = score;
-					if (bestscore >= beta) break;
-					else if (bestscore > alpha) alpha = bestscore;
-				}
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-			}
-<<<<<<< HEAD
-		} while (moves);
-		search->eval.feature = eval0.feature;
-		search->eval.n_empties = eval0.n_empties;
-		search->board = board0.board;
-
-	} else {
-<<<<<<< HEAD
-		moves = get_moves(search->board.opponent, search->board.player);
-		if (moves) {
-			search_update_pass_midgame(search, &eval0);
-			bestscore = -search_eval_2(search, -beta, -alpha, moves);
-			search_restore_pass_midgame(search, &eval0);
-		} else { // game over
-=======
-		if (can_move(search->board.opponent, search->board.player)) {
-=======
-		}
-=======
-=======
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 		x = NOMOVE;
 		do {
 			do {
@@ -938,35 +238,13 @@ int search_eval_2(Search *search, int alpha, int beta, unsigned long long moves)
 				else if (bestscore > alpha) alpha = bestscore;
 			}
 		} while (moves);
->>>>>>> 6a63841 (exit search_shallow/search_eval loop when all bits processed)
 		search->eval.feature = eval0.feature;
 		search->eval.n_empties = eval0.n_empties;
 		search->board = board0.board;
 
 	} else {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		moves = get_moves(search->board.opponent, search->board.player);
 		if (moves) {
-<<<<<<< HEAD
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-			search_update_pass_midgame(search);
-=======
-			search_update_pass_midgame(search, &backup.eval);
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-			bestscore = -search_eval_2(search, -beta, -alpha, moves);
-			search_restore_pass_midgame(search, &backup.eval);
-		} else {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		if (pass1) { // game over
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-			bestscore = search_solve(search);
-		} else {
-=======
-		moves = get_moves(search->board.opponent, search->board.player);
-		if (moves) {
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 			search_update_pass_midgame(search, &eval0);
 			bestscore = -search_eval_2(search, -beta, -alpha, moves);
 			search_restore_pass_midgame(search, &eval0);
@@ -980,10 +258,6 @@ int search_eval_2(Search *search, int alpha, int beta, unsigned long long moves)
 	return bestscore;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
 /**
  * @brief Probcut
  *
@@ -996,7 +270,6 @@ int search_eval_2(Search *search, int alpha, int beta, unsigned long long moves)
  * @param value Returned value.
  * @return true if probable cutoff has been found, false otherwise.
  */
->>>>>>> af8242f (Imply NO_SELECTIVITY in shallow searches)
 static inline void search_update_probcut(Search *search, const NodeType node_type) 
 {
 	search->node_type[search->height] = node_type;
@@ -1012,37 +285,6 @@ static inline void search_restore_probcut(Search *search, const NodeType node_ty
 	LIMIT_RECURSIVE_PROBCUT(--search->probcut_level;)
 }
 
-<<<<<<< HEAD
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-/**
- * @brief Probcut
- *
- * Check if a position is worth to analyze further.
- *
- * @param search Position to test.
- * @param alpha Alpha lower bound.
- * @param depth Search depth.
- * @param parent Parent node.
- * @param value Returned value.
- * @return true if probable cutoff has been found, false otherwise.
- */
-static inline void search_update_probcut(Search *search, const NodeType node_type) 
-{
-	search->node_type[search->height] = node_type;
-	if (!USE_RECURSIVE_PROBCUT) search->selectivity = NO_SELECTIVITY;
-	LIMIT_RECURSIVE_PROBCUT(++search->probcut_level;)
-}
-
-
-static inline void search_restore_probcut(Search *search, const NodeType node_type, const int selectivity) 
-{
-	search->node_type[search->height] = node_type;
-	if (!USE_RECURSIVE_PROBCUT) search->selectivity = selectivity;
-	LIMIT_RECURSIVE_PROBCUT(--search->probcut_level;)
-}
-
-=======
->>>>>>> af8242f (Imply NO_SELECTIVITY in shallow searches)
 static bool search_probcut(Search *search, const int alpha, const int depth, Node *parent, int *value)
 {
 	// assertion 
@@ -1128,78 +370,20 @@ static int NWS_shallow(Search *search, const int alpha, int depth, HashTable *ha
 {
 	int score, bestscore;
 	unsigned long long hash_code;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// const int beta = alpha + 1;
-	HashStoreData hash_data;
-=======
-	const int beta = alpha + 1;
-	HashData hash_data;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	// const int beta = alpha + 1;
-<<<<<<< HEAD
-	HashData hash_data;
-	HashStoreData hash_store_data;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	HashStoreData hash_data;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 	MoveList movelist;
 	Move *move;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	Eval eval0;
 	V2DI board0;
 	long long nodes_org;
-=======
-	Eval Ev0;
-<<<<<<< HEAD
-	int bestscore, bestmove;
-	long long cost = -search->n_nodes;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
 
 	if (depth == 2) return search_eval_2(search, alpha, alpha + 1, board_get_moves(&search->board));
-<<<<<<< HEAD
-=======
-=======
-	Search_Backup backup;
-<<<<<<< HEAD
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-	int bestscore;
-	long long nodes_org = search->n_nodes;
-=======
-	long long nodes_org;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (depth == 2) return search_eval_2(search, alpha, alpha + 1);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-	if (depth == 2) return search_eval_2(search, alpha, alpha + 1, get_moves(search->board.player, search->board.opponent));
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	if (depth == 2) return search_eval_2(search, alpha, alpha + 1, false);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 
 	SEARCH_STATS(++statistics.n_NWS_midgame);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
-=======
-	assert(search->n_empties == bit_count(~(search->board.player | search->board.opponent)));
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
 	assert(depth > 2);
 	assert(hash_table != NULL);
@@ -1210,8 +394,6 @@ static int NWS_shallow(Search *search, const int alpha, int depth, HashTable *ha
 	// stability cutoff
 	if (search_SC_NWS(search, alpha, &score)) return score;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	search_get_movelist(search, &movelist);
 	board0.board = search->board;
 	eval0 = search->eval;
@@ -1224,167 +406,20 @@ static int NWS_shallow(Search *search, const int alpha, int depth, HashTable *ha
 		// sort the list of moves
 		nodes_org = search->n_nodes;
 		movelist_evaluate(&movelist, search, &hash_data.data, alpha, depth);
-=======
-	// transposition cutoff
-	hash_code = board_get_hash_code(&search->board);
-	if (hash_get(hash_table, &search->board, hash_code, &hash_data) && search_TC_NWS(&hash_data, depth, search->selectivity, alpha, &score)) return score;
-=======
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-	search_get_movelist(search, &movelist);
-	board0.board = search->board;
-	eval0 = search->eval;
-
-	if (movelist.n_moves > 1) {
-		// transposition cutoff
-		if (hash_get(hash_table, &search->board, hash_code, &hash_data.data) && search_TC_NWS(&hash_data.data, depth, NO_SELECTIVITY, alpha, &score))
-			return score;
-
-		// sort the list of moves
-<<<<<<< HEAD
-<<<<<<< HEAD
-		movelist_evaluate(&movelist, search, &hash_data, alpha, depth);
-<<<<<<< HEAD
-		movelist_sort(&movelist) ;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		movelist_sort(&movelist);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 
 		// loop over all moves
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		bestscore = -SCORE_INF;
-		foreach_best_move(move, movelist) {
-			search_update_midgame(search, move);
-			score = -NWS_shallow(search, ~alpha, depth - 1, hash_table);
-			search_restore_midgame(search, move->x, &eval0);
-			search->board = board0.board;
-=======
-		bestscore = -SCORE_INF; bestmove = NOMOVE;
-=======
-		bestscore = -SCORE_INF; hash_store_data.move = NOMOVE;
-<<<<<<< HEAD
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-		Ev0 = search->eval;
-=======
-=======
-		bestscore = -SCORE_INF; hash_store_data.data.move[0] = NOMOVE;
-<<<<<<< HEAD
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-		Ev0.feature = search->eval.feature;
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-		foreach_move(move, movelist) {
-			search_update_midgame(search, move);
-			score = -NWS_shallow(search, -(alpha + 1), depth - 1, hash_table);
-			search_restore_midgame(search, move, &Ev0);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
-		if (movelist.n_moves > 1) {
-			movelist_evaluate(&movelist, search, &hash_data, alpha, depth);
-			movelist_sort(&movelist);
-		}
-
-		// loop over all moves
-		bestscore = -SCORE_INF;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-		backup.board = search->board;
-		backup.eval = search->eval;
-=======
-		nodes_org = search->n_nodes;
-		movelist_evaluate(&movelist, search, &hash_data.data, alpha, depth);
-
-		// loop over all moves
-		bestscore = -SCORE_INF;
-<<<<<<< HEAD
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-		move = movelist.move[0].next;
-		do {
-=======
-		foreach_best_move(move, movelist) {
->>>>>>> 19da175 (differed movelist sort in PVS/NWS_shallow)
-			search_update_midgame(search, move);
-			score = -NWS_shallow(search, ~alpha, depth - 1, hash_table);
-<<<<<<< HEAD
-			search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-			search_restore_midgame(search, move->x, &eval0);
-			search->board = board0.board;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-			if (score > bestscore) {
-				bestscore = score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-				hash_data.data.move[0] = move->x;
-=======
-				hash_store_data.move = move->x;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-				hash_store_data.data.move[0] = move->x;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-=======
-				hash_data.data.move[0] = move->x;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-				if (score > alpha) break;
-			}
-<<<<<<< HEAD
-<<<<<<< HEAD
-		}
-
-		// save the best result in hash tables
-		hash_data.data.wl.c.depth = depth;
-		hash_data.data.wl.c.selectivity = NO_SELECTIVITY;	// (4.5.1)
-		hash_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-		// hash_data.data.move[0] = bestmove;
-		hash_data.alpha = alpha;
-		hash_data.beta = alpha + 1;
-		hash_data.score = bestscore;
-		hash_store(hash_table, &search->board, hash_code, &hash_data);
-
-	} else if (movelist.n_moves == 1) {
-		move = movelist_first(&movelist);
-		search_update_midgame(search, move);
-		bestscore = -NWS_shallow(search, ~alpha, depth - 1, hash_table);
-		search_restore_midgame(search, move->x, &eval0);
-		search->board = board0.board;
-<<<<<<< HEAD
-
-	} else { // no moves
-		if (can_move(search->board.opponent, search->board.player)) { // pass ?
-			search_update_pass_midgame(search, &eval0);
-			bestscore = -NWS_shallow(search, ~alpha, depth, hash_table);
-			search_restore_pass_midgame(search, &eval0);
-		} else { // game-over !
-			bestscore = search_solve(search);
-		}
-=======
-		} while ((move = move->next));
-<<<<<<< HEAD
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-	}
-
-<<<<<<< HEAD
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-=======
-	// save the best result in hash tables
-	hash_store_data.data.wl.c.depth = depth;
-	hash_store_data.data.wl.c.selectivity = search->selectivity;
-	hash_store_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-	// hash_store_data.data.move[0] = bestmove;
-	hash_store_data.alpha = alpha;
-	hash_store_data.beta = alpha + 1;
-	hash_store_data.score = bestscore;
-	hash_store(hash_table, &search->board, hash_code, &hash_store_data);
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-=======
+		bestscore = -SCORE_INF;
+		foreach_best_move(move, movelist) {
+			search_update_midgame(search, move);
+			score = -NWS_shallow(search, ~alpha, depth - 1, hash_table);
+			search_restore_midgame(search, move->x, &eval0);
+			search->board = board0.board;
+			if (score > bestscore) {
+				bestscore = score;
+				hash_data.data.move[0] = move->x;
+				if (score > alpha) break;
+			}
 		}
->>>>>>> 19da175 (differed movelist sort in PVS/NWS_shallow)
 
 		// save the best result in hash tables
 		hash_data.data.wl.c.depth = depth;
@@ -1400,9 +435,8 @@ static int NWS_shallow(Search *search, const int alpha, int depth, HashTable *ha
 		move = movelist_first(&movelist);
 		search_update_midgame(search, move);
 		bestscore = -NWS_shallow(search, ~alpha, depth - 1, hash_table);
-		search_restore_midgame(search, move->x, &backup);
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
+		search_restore_midgame(search, move->x, &eval0);
+		search->board = board0.board;
 
 	} else { // no moves
 		if (can_move(search->board.opponent, search->board.player)) { // pass ?
@@ -1415,7 +449,6 @@ static int NWS_shallow(Search *search, const int alpha, int depth, HashTable *ha
 	}
 
 	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 	return bestscore;
 }
 
@@ -1436,73 +469,20 @@ static int NWS_shallow(Search *search, const int alpha, int depth, HashTable *ha
  */
 int PVS_shallow(Search *search, int alpha, int beta, int depth)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	int score, bestscore, lower;
-	// unsigned long long hash_code;
-	HashStoreData hash_data;
-=======
-	int score;
-	HashTable *const hash_table = &search->shallow_table;
-	unsigned long long hash_code;
-	HashData hash_data;
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-=======
 	int score, bestscore, lower;
 	// unsigned long long hash_code;
-<<<<<<< HEAD
-	// HashData hash_data;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-	HashStoreData hash_store_data;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	HashStoreData hash_data;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 	MoveList movelist;
 	Move *move;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	Eval eval0;
 	Board board0;
 	long long nodes_org;
-=======
-	Eval Ev0;
-=======
-	Search_Backup backup;
-<<<<<<< HEAD
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-	int bestscore;
-	long long nodes_org = search->n_nodes;
-	int lower;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	long long nodes_org;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (depth == 2) return search_eval_2(search, alpha, beta, board_get_moves(&search->board));
-=======
-	if (depth == 2) return search_eval_2(search, alpha, beta, get_moves(search->board.player, search->board.opponent));
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	if (depth == 2) return search_eval_2(search, alpha, beta, false);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
 	if (depth == 2) return search_eval_2(search, alpha, beta, board_get_moves(&search->board));
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 
 	SEARCH_STATS(++statistics.n_PVS_shallow);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
 	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
 
@@ -1513,50 +493,6 @@ int PVS_shallow(Search *search, int alpha, int beta, int depth)
 		if (score <= alpha) {
 			CUTOFF_STATS(++statistics.n_stability_low_cutoff;)
 			return score;
-<<<<<<< HEAD
-=======
-	assert(search->n_empties == bit_count(~(search->board.player | search->board.opponent)));
-=======
-	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
-
-	// stability cutoff
-	if (search_SC_PVS(search, &alpha, &beta, &score)) return score;
-=======
-		}
-		else if (score < beta) beta = score;
-	}
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-
-	search_get_movelist(search, &movelist);
-	board0 = search->board;
-	eval0 = search->eval;
-
-	if (movelist.n_moves > 1) {
-		// transposition cutoff (unused, normally first searched position)
-		// hash_code = board_get_hash_code(&search->board);
-		// if (hash_get(&search->shallow_table, &search->board, hash_code, &hash_data.data) && search_TC_PVS(&hash_data.data, depth, NO_SELECTIVITY, &alpha, &beta, &score)) return score;
-
-<<<<<<< HEAD
-	if (movelist_is_empty(&movelist)) { // no moves ?
-		if (can_move(search->board.opponent, search->board.player)) { // pass ?
-			search_update_pass_midgame(search, &backup.eval);
-			bestscore = -PVS_shallow(search, -beta, -alpha, depth);
-			hash_store_data.data.move[0] = PASS;
-			search_restore_pass_midgame(search, &backup.eval);
-		} else { // game-over !
-			bestscore = search_solve(search);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			bestmove = NOMOVE;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			hash_store_data.move = NOMOVE;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-			hash_store_data.data.move[0] = NOMOVE;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 		}
 		else if (score < beta) beta = score;
 	}
@@ -1571,69 +507,11 @@ int PVS_shallow(Search *search, int alpha, int beta, int depth)
 		// if (hash_get(&search->shallow_table, &search->board, hash_code, &hash_data.data) && search_TC_PVS(&hash_data.data, depth, NO_SELECTIVITY, &alpha, &beta, &score)) return score;
 
 		// sort the list of moves
-<<<<<<< HEAD
-<<<<<<< HEAD
-		nodes_org = search->n_nodes;
-		movelist_evaluate(&movelist, search, &HASH_DATA_INIT, alpha, depth);
-=======
-		movelist_evaluate(&movelist, search, &hash_data, alpha, depth);
-		movelist_sort(&movelist) ;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-
-		// loop over all moves
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		move = movelist_best(&movelist);
-		search_update_midgame(search, move);
-		bestscore = -PVS_shallow(search, -beta, -alpha, depth - 1);
-		hash_data.data.move[0] = move->x;
-		search_restore_midgame(search, move->x, &eval0);
-		search->board = board0;
-		lower = (bestscore > alpha) ? bestscore : alpha;
-
-		while ((bestscore < beta) && (move = move_next_best(move))) {
-			search_update_midgame(search, move);
-			score = -NWS_shallow(search, ~lower, depth - 1, &search->shallow_table);
-			if (lower < score && score < beta)
-				lower = score = -PVS_shallow(search, -beta, -lower, depth - 1);
-			search_restore_midgame(search, move->x, &eval0);
-			search->board = board0;
-=======
-		bestscore = -SCORE_INF; bestmove = NOMOVE;
-=======
-		bestscore = -SCORE_INF; hash_store_data.move = NOMOVE;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-		bestscore = -SCORE_INF; hash_store_data.data.move[0] = NOMOVE;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-=======
-		if (movelist.n_moves > 1) {
-			movelist_evaluate(&movelist, search, &hash_data, alpha, depth);
-			movelist_sort(&movelist);
-		}
-
-		// loop over all moves
-<<<<<<< HEAD
-		bestscore = -SCORE_INF;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-		lower = alpha;
-=======
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-		backup.board = search->board;
-		backup.eval = search->eval;
-=======
-		// sort the list of moves
 		nodes_org = search->n_nodes;
 		movelist_evaluate(&movelist, search, &HASH_DATA_INIT, alpha, depth);
 
 		// loop over all moves
-<<<<<<< HEAD
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-		move = movelist.move[0].next;
-=======
 		move = movelist_best(&movelist);
->>>>>>> 19da175 (differed movelist sort in PVS/NWS_shallow)
 		search_update_midgame(search, move);
 		bestscore = -PVS_shallow(search, -beta, -alpha, depth - 1);
 		hash_data.data.move[0] = move->x;
@@ -1643,108 +521,16 @@ int PVS_shallow(Search *search, int alpha, int beta, int depth)
 
 		while ((bestscore < beta) && (move = move_next_best(move))) {
 			search_update_midgame(search, move);
-<<<<<<< HEAD
-<<<<<<< HEAD
-				if (bestscore == -SCORE_INF) {
-					score = -PVS_shallow(search, -beta, -lower, depth - 1);
-				} else {
-					score = -NWS_shallow(search, -lower - 1, depth - 1, hash_table);
-					if (alpha < score && score < beta) {
-						score = -PVS_shallow(search, -beta, -lower, depth - 1);
-					}
-				}
-<<<<<<< HEAD
-			search_restore_midgame(search, move, &Ev0);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
-			score = -NWS_shallow(search, -lower - 1, depth - 1, hash_table);
-=======
 			score = -NWS_shallow(search, ~lower, depth - 1, &search->shallow_table);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 			if (lower < score && score < beta)
 				lower = score = -PVS_shallow(search, -beta, -lower, depth - 1);
-<<<<<<< HEAD
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-			search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
 			search_restore_midgame(search, move->x, &eval0);
 			search->board = board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 			if (score > bestscore) {
 				bestscore = score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-				hash_data.data.move[0] = move->x;
-=======
-				hash_store_data.move = move->x;
-=======
-				hash_store_data.data.move[0] = move->x;
-<<<<<<< HEAD
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-				if (score >= beta) break;
-				else if (score > lower) lower = score;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-			}
-<<<<<<< HEAD
-		}
-
-		// save the best result in shallow hash
-		hash_data.data.wl.c.depth = depth;
-		hash_data.data.wl.c.selectivity = NO_SELECTIVITY;	// (4.5.1)
-		hash_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-		// hash_data.data.move[0] = bestmove;
-		hash_data.alpha = alpha;
-		hash_data.beta = beta;
-		hash_data.score = bestscore;
-		hash_store(&search->shallow_table, &search->board, board_get_hash_code(&search->board), &hash_data);
-
-	} else if (movelist.n_moves == 1) {
-		move = movelist.move[0].next;
-		search_update_midgame(search, move);
-		bestscore = -PVS_shallow(search, -beta, -alpha, depth - 1);
-		search_restore_midgame(search, move->x, &eval0);
-		search->board = board0;
-
-	} else { // no moves
-		if (can_move(search->board.opponent, search->board.player)) { // pass ?
-			search_update_pass_midgame(search, &eval0);
-			bestscore = -PVS_shallow(search, -beta, -alpha, depth);
-			search_restore_pass_midgame(search, &eval0);
-		} else { // game-over !
-			bestscore = search_solve(search);
-		}
-=======
-		} while ((move = move->next));
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
-=======
 				hash_data.data.move[0] = move->x;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 			}
 		}
-<<<<<<< HEAD
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-	}
-
-<<<<<<< HEAD
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-=======
-	// save the best result in hash tables
-	hash_store_data.data.wl.c.depth = depth;
-	hash_store_data.data.wl.c.selectivity = search->selectivity;
-	hash_store_data.data.wl.c.cost = last_bit(search->n_nodes - nodes_org);
-	// hash_store_data.data.move[0] = bestmove;
-	hash_store_data.alpha = alpha;
-	hash_store_data.beta = beta;
-	hash_store_data.score = bestscore;
-	hash_store(hash_table, &search->board, hash_code, &hash_store_data);
-	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
-
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 
 		// save the best result in shallow hash
 		hash_data.data.wl.c.depth = depth;
@@ -1774,7 +560,6 @@ int PVS_shallow(Search *search, int alpha, int beta, int depth)
 	}
 
 	assert(SCORE_MIN <= bestscore && bestscore <= SCORE_MAX);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 	return bestscore;
 }
 
@@ -1796,104 +581,26 @@ int PVS_shallow(Search *search, int alpha, int beta, int depth)
 int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 {
 	int score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long hash_code;
-	// const int beta = alpha + 1;
-	HashStoreData hash_data;
-	MoveList movelist;
-	Move *move;
-<<<<<<< HEAD
-	Node node;
-	Eval eval0;
-	V2DI board0;
-	long long nodes_org;
-=======
-	Node node[1];
-=======
-	HashTable *const hash_table = &search->hash_table;
-	HashTable *const pv_table = &search->pv_table;
-=======
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 	unsigned long long hash_code;
 	// const int beta = alpha + 1;
 	HashStoreData hash_data;
 	MoveList movelist;
 	Move *move;
 	Node node;
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	Eval Ev0;
-<<<<<<< HEAD
-	long long cost = -search->n_nodes - search->child_nodes;
-	int cost_bits;
-	int hash_selectivity;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
-	Search_Backup backup;
-<<<<<<< HEAD
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-	long long nodes_org = search->n_nodes + search->child_nodes;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-=======
 	Eval eval0;
 	V2DI board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	long long nodes_org;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
-	assert((2 <= depth && depth < search->eval.n_empties) || depth == search->eval.n_empties);
-=======
-	assert(search->n_empties == bit_count(~(search->board.player | search->board.opponent)));
-	assert((2 <= depth && depth < search->n_empties) || depth == search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
 	assert((2 <= depth && depth < search->eval.n_empties) || depth == search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
 	assert(parent != NULL);
 
 	search_check_timeout(search);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
 	if (search->stop)
 		return alpha;
 
 	if (search->eval.n_empties == 0)
-<<<<<<< HEAD
-		return search_solve_0(search);
-	else if (depth < search->eval.n_empties) {
-		if (depth <= 3)
-			return NWS_shallow(search, alpha, depth, &search->hash_table);
-	} else {
-		if (depth < DEPTH_MIDGAME_TO_ENDGAME)
-			return NWS_endgame(search, alpha);
-	}
-=======
-	if (search->stop) return alpha;
-<<<<<<< HEAD
-	else if (search->eval.n_empties == 0) return search_solve_0(search);
-	else if (depth <= 3 && depth < search->eval.n_empties) return NWS_shallow(search, alpha, depth, hash_table);
-	else if (search->eval.n_empties <= depth && depth < DEPTH_MIDGAME_TO_ENDGAME) return NWS_endgame(search, alpha);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-	else if (search->eval.n_empties == 0)
-		return search_solve_0(search);
-	else if (depth <= 3 && depth < search->eval.n_empties)
-		return NWS_shallow(search, alpha, depth, hash_table);
-	else if (search->eval.n_empties <= depth && depth < DEPTH_MIDGAME_TO_ENDGAME)
-		return NWS_endgame(search, alpha);
->>>>>>> 1e01a49 (Change EVAL_FEATURE to struct for readability; decrease EVAL_N_PLY)
-=======
 		return search_solve_0(search);
 	else if (depth < search->eval.n_empties) {
 		if (depth <= 3)
@@ -1902,7 +609,6 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 		if (depth < DEPTH_MIDGAME_TO_ENDGAME)
 			return NWS_endgame(search, alpha);
 	}
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
 
 	SEARCH_STATS(++statistics.n_NWS_midgame);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
@@ -1914,86 +620,19 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 	hash_prefetch(&search->hash_table, hash_code);
 	hash_prefetch(&search->pv_table, hash_code);
 
-<<<<<<< HEAD
-	// stability cutoff
-	if (search_SC_NWS(search, alpha, &score)) return score;
-
-<<<<<<< HEAD
-	hash_code = board_get_hash_code(&search->board);
-	hash_prefetch(&search->hash_table, hash_code);
-	hash_prefetch(&search->pv_table, hash_code);
-
-=======
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
 	nodes_org = search->n_nodes + search->child_nodes;
 	search_get_movelist(search, &movelist);
 
 	// transposition cutoff
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data) || hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data))
-=======
-	board0.board = search->board;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (vhash_get(&search->hash_table, board0, hash_code, &hash_data.data) || vhash_get(&search->pv_table, board0, hash_code, &hash_data.data))
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-	if (hash_get(&search->hash_table, HBOARD_V(board0), hash_code, &hash_data.data) || hash_get(&search->pv_table, HBOARD_V(board0), hash_code, &hash_data.data))
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-	if (hash_get(&search->hash_table, &board0.board, hash_code, &hash_data.data) || hash_get(&search->pv_table, &board0.board, hash_code, &hash_data.data))
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
 	if (hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data) || hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data))
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		if (search_TC_NWS(&hash_data.data, depth, search->selectivity, alpha, &score)) return score;
 
 	if (movelist_is_empty(&movelist)) { // no moves ?
 		node_init(&node, search, alpha, alpha + 1, depth, movelist.n_moves, parent);
 		if (can_move(search->board.opponent, search->board.player)) { // pass ?
 			search_update_pass_midgame(search, &eval0);
-<<<<<<< HEAD
-			node.bestscore = -NWS_midgame(search, -node.beta, depth, &node);
-			search_restore_pass_midgame(search, &eval0);
-=======
-	hash_code = board_get_hash_code(&search->board);
-	if ((hash_get(hash_table, &search->board, hash_code, &hash_data) || hash_get(pv_table, &search->board, hash_code, &hash_data)) && search_TC_NWS(&hash_data, depth, search->selectivity, alpha, &score)) return score;
-
-=======
-	nodes_org = search->n_nodes + search->child_nodes;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-	search_get_movelist(search, &movelist);
-
-	// transposition cutoff
-	if ((hash_get(&search->hash_table, &search->board, hash_code, &hash_data) || hash_get(&search->pv_table, &search->board, hash_code, &hash_data)) && search_TC_NWS(&hash_data, depth, search->selectivity, alpha, &score)) return score;
-=======
-	if (hash_get(&search->hash_table, &search->board, hash_code, &hash_data) || hash_get(&search->pv_table, &search->board, hash_code, &hash_data))
-		if (search_TC_NWS(&hash_data, depth, search->selectivity, alpha, &score)) return score;
->>>>>>> 264e827 (calc solid stone only when stability cutoff tried)
-=======
-	if (hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data) || hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data))
-		if (search_TC_NWS(&hash_data.data, depth, search->selectivity, alpha, &score)) return score;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-
-	if (movelist_is_empty(&movelist)) { // no moves ?
-		node_init(&node, search, alpha, alpha + 1, depth, movelist.n_moves, parent);
-		if (can_move(search->board.opponent, search->board.player)) { // pass ?
-			search_update_pass_midgame(search, &backup.eval);
-			node.bestscore = -NWS_midgame(search, -node.beta, depth, &node);
-<<<<<<< HEAD
-			search_restore_pass_midgame(search);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			search_restore_pass_midgame(search, &backup.eval);
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-=======
 			node.bestscore = -NWS_midgame(search, -node.beta, depth, &node);
 			search_restore_pass_midgame(search, &eval0);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 		} else { // game-over !
 			node.bestscore = search_solve(search);
 		}
@@ -2004,69 +643,18 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 
 		// sort the list of moves
 		if (movelist.n_moves > 1) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 			if (hash_data.data.move[0] == NOMOVE) hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data);
-=======
-			if (hash_data.data.move[0] == NOMOVE) vhash_get(&search->hash_table, board0, hash_code, &hash_data.data);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-			if (hash_data.data.move[0] == NOMOVE) hash_get(&search->hash_table, HBOARD_V(board0), hash_code, &hash_data.data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-			if (hash_data.data.move[0] == NOMOVE) hash_get(&search->hash_table, &board0.board, hash_code, &hash_data.data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
-			if (hash_data.data.move[0] == NOMOVE) hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data);
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 			movelist_evaluate(&movelist, search, &hash_data.data, alpha, depth + options.inc_sort_depth[search->node_type[search->height]]);
 			movelist_sort(&movelist);
-=======
-			if (hash_data.move[0] == NOMOVE) hash_get(hash_table, &search->board, hash_code, &hash_data);
-=======
-			if (hash_data.move[0] == NOMOVE) hash_get(&search->hash_table, &search->board, hash_code, &hash_data);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-			movelist_evaluate(&movelist, search, &hash_data, alpha, depth + options.inc_sort_depth[search->node_type[search->height]]);
-<<<<<<< HEAD
-			movelist_sort(&movelist) ;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			movelist_sort(&movelist);
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
 		}
 
 		// ETC
 		if (search_ETC_NWS(search, &movelist, hash_code, depth, search->selectivity, alpha, &score)) return score;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 		node_init(&node, search, alpha, alpha + 1, depth, movelist.n_moves, parent);
-=======
-		node_init(&node, search, alpha, beta, depth, movelist.n_moves, parent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		node_init(&node, search, alpha, alpha + 1, depth, movelist.n_moves, parent);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 
 		// loop over all moves
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		board0.board = search->board;
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
 		board0.board = search->board;
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		eval0 = search->eval;
 		for (move = node_first_move(&node, &movelist); move; move = node_next_move(&node)) {
 			if (!node_split(&node, move)) {
@@ -2075,34 +663,6 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 				search_restore_midgame(search, move->x, &eval0);
 				search->board = board0.board;
 				node_update(&node, move);
-=======
-		Ev0 = search->eval;
-=======
-		Ev0.feature = search->eval.feature;
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-		backup.board = search->board;
-		backup.eval = search->eval;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-		for (move = node_first_move(&node, &movelist); move; move = node_next_move(&node)) {
-			if (!node_split(&node, move)) {
-				search_update_midgame(search, move);
-<<<<<<< HEAD
-				move->score = -NWS_midgame(search, -beta, depth - 1, &node);
-<<<<<<< HEAD
-				search_restore_midgame(search, move, &Ev0);
-<<<<<<< HEAD
-				node_update(node, move);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
-=======
-				move->score = -NWS_midgame(search, ~alpha, depth - 1, &node);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-				search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-				node_update(&node, move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
 			}
 		}
 		node_wait_slaves(&node);
@@ -2110,25 +670,6 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 
 	// save the best result in hash tables
 	if (!search->stop) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (depth <= ((search->eval.n_empties <= depth) ? DEPTH_MIDGAME_TO_ENDGAME : 4))
-			hash_data.data.wl.c.selectivity = NO_SELECTIVITY; // hack
-		else hash_data.data.wl.c.selectivity = search->selectivity;
-		hash_data.data.wl.c.depth = depth;
-		hash_data.data.wl.c.cost = last_bit(search->n_nodes + search->child_nodes - nodes_org);
-		hash_data.data.move[0] = node.bestmove;
-		hash_data.alpha = alpha;
-		hash_data.beta = alpha + 1;
-		hash_data.score = node.bestscore;
-
-		if (search->height <= PV_HASH_HEIGHT) hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
-		hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
-=======
 		if (depth <= ((search->eval.n_empties <= depth) ? DEPTH_MIDGAME_TO_ENDGAME : 4))
 			hash_data.data.wl.c.selectivity = NO_SELECTIVITY; // hack
 		else hash_data.data.wl.c.selectivity = search->selectivity;
@@ -2139,75 +680,15 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 		hash_data.beta = alpha + 1;
 		hash_data.score = node.bestscore;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (search->height <= PV_HASH_HEIGHT) hash_store(&search->pv_table, &search->board, hash_code, &hash_store_data);
-		hash_store(&search->hash_table, &search->board, hash_code, &hash_store_data);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-=======
-		if (search->height <= PV_HASH_HEIGHT) hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
-		hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-		if (search->height <= PV_HASH_HEIGHT) hash_store(&search->pv_table, HBOARD_P(&search->board), hash_code, &hash_data);
-		hash_store(&search->hash_table, HBOARD_P(&search->board), hash_code, &hash_data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 		if (search->height <= PV_HASH_HEIGHT) hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
 		hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-
-		SQUARE_STATS(foreach_move(move, &movelist))
-		SQUARE_STATS(++statistics.n_played_square[search->eval.n_empties][SQUARE_TYPE[move->x]];)
-		SQUARE_STATS(if (node.bestscore > alpha) ++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[node->bestmove]];)
-
-	 	assert(SCORE_MIN <= node.bestscore && node.bestscore <= SCORE_MAX);
 
-=======
-		cost += search->n_nodes + search->child_nodes;
-		cost_bits = last_bit(cost);
-		if (search->n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_selectivity = NO_SELECTIVITY; // hack
-		else hash_selectivity = search->selectivity;
-		if (search->height <= PV_HASH_HEIGHT) hash_store(pv_table, &search->board, hash_code, depth, hash_selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
-		hash_store(hash_table, &search->board, hash_code, depth, hash_selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
-=======
-		if (search->n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.selectivity = NO_SELECTIVITY; // hack
-		else hash_store_data.data.selectivity = search->selectivity;
-		hash_store_data.data.depth = depth;
-		hash_store_data.data.cost = last_bit(search->n_nodes + search->child_nodes - nodes_org);
-=======
-		if (search->n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY; // hack
-=======
-		if (search->eval.n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY; // hack
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-		else hash_store_data.data.wl.c.selectivity = search->selectivity;
-=======
-		/* if (search->eval.n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY; // hack
-		else */ hash_store_data.data.wl.c.selectivity = search->selectivity;
->>>>>>> c0fb778 (small optimizations in endgame)
-		hash_store_data.data.wl.c.depth = depth;
-		hash_store_data.data.wl.c.cost = last_bit(search->n_nodes + search->child_nodes - nodes_org);
-		hash_store_data.data.move[0] = node.bestmove;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-		hash_store_data.alpha = alpha;
-		hash_store_data.beta = beta;
-		hash_store_data.score = node.bestscore;
-
-		if (search->height <= PV_HASH_HEIGHT) hash_store(pv_table, &search->board, hash_code, &hash_store_data);
-		hash_store(hash_table, &search->board, hash_code, &hash_store_data);
-
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
 		SQUARE_STATS(foreach_move(move, &movelist))
 		SQUARE_STATS(++statistics.n_played_square[search->eval.n_empties][SQUARE_TYPE[move->x]];)
 		SQUARE_STATS(if (node.bestscore > alpha) ++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[node->bestmove]];)
 
 	 	assert(SCORE_MIN <= node.bestscore && node.bestscore <= SCORE_MAX);
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 
->>>>>>> c0fb778 (small optimizations in endgame)
 	} else {
 		node.bestscore = alpha;
 	}
@@ -2236,143 +717,35 @@ int NWS_midgame(Search *search, const int alpha, int depth, Node *parent)
 int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node *parent)
 {
 	// declaration
-<<<<<<< HEAD
-<<<<<<< HEAD
-	unsigned long long hash_code, solid_opp;
-	HashStoreData hash_data;
-	MoveList movelist;
-	Move *move;
-<<<<<<< HEAD
-	Node node;
-	Eval eval0;
-	Board board0, hashboard;
-	long long nodes_org;
-	int reduced_depth, depth_pv_extension, saved_selectivity, ofssolid;
-=======
-	Node node[1];
-=======
-	HashTable *const hash_table = &search->hash_table;
-	HashTable *const pv_table = &search->pv_table;
-=======
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 	unsigned long long hash_code, solid_opp;
 	HashStoreData hash_data;
 	MoveList movelist;
 	Move *move;
 	Node node;
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	Eval Ev0;
-=======
-	Search_Backup backup;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
 	Eval eval0;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	V2DI board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-	V2DI board0, hashboard;
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 	Board board0, hashboard;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 	long long nodes_org;
-<<<<<<< HEAD
-	int reduced_depth, depth_pv_extension, saved_selectivity;
-<<<<<<< HEAD
-	int hash_selectivity;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 	int reduced_depth, depth_pv_extension, saved_selectivity, ofssolid;
-<<<<<<< HEAD
-	Board hashboard;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	V4DI full;
->>>>>>> 9794cc1 (Store solid-normalized hash in PVS_midgame)
-=======
-	unsigned long long full[5];
->>>>>>> 4303b09 (Returns all full lines in full[4])
-=======
->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX)
-=======
->>>>>>> e88638e (add vectorcall interface to hash functions)
 
 	SEARCH_STATS(++statistics.n_PVS_midgame);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
-	assert(depth <= search->eval.n_empties);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	assert(search->n_empties == bit_count(~(search->board.player | search->board.opponent)));
-	assert(depth <= search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	assert(search->eval.n_empties == bit_count(~(search->board.player | search->board.opponent)));
 	assert(depth <= search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-	assert((-SCORE_MAX <= alpha && alpha <= SCORE_MAX) || printf("alpha = %d\n", alpha));
-	assert((-SCORE_MAX <= beta && beta <= SCORE_MAX) || printf("beta = %d\n", beta));
-=======
-	assert((-SCORE_MAX <= alpha && alpha <= SCORE_MAX) || !printf("alpha = %d\n", alpha));
-	assert((-SCORE_MAX <= beta && beta <= SCORE_MAX) || !printf("beta = %d\n", beta));
->>>>>>> c0fb778 (small optimizations in endgame)
-=======
 	assert((-SCORE_MAX <= alpha && alpha <= SCORE_MAX) || printf("alpha = %d\n", alpha));
 	assert((-SCORE_MAX <= beta && beta <= SCORE_MAX) || printf("beta = %d\n", beta));
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 	assert(alpha <= beta);
 
 	// end of search ?
 	search_check_timeout(search);
 	if (search->stop) return alpha;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	else if (search->eval.n_empties == 0)
 		return search_solve_0(search);
 	else if (USE_PV_EXTENSION && search->eval.n_empties <= search->depth_pv_extension)
 		depth = search->eval.n_empties;
-<<<<<<< HEAD
 	else if (depth == 2 && search->eval.n_empties > 2)
 		return search_eval_2(search, alpha, beta, board_get_moves(&search->board));
-<<<<<<< HEAD
-=======
-	else if (search->n_empties == 0)
-=======
-	else if (search->eval.n_empties == 0)
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-		return search_solve_0(search);
-	else if (USE_PV_EXTENSION && depth < search->eval.n_empties && search->eval.n_empties <= search->depth_pv_extension)
-		return PVS_midgame(search, alpha, beta, search->eval.n_empties, parent);
-=======
->>>>>>> 4303b09 (Returns all full lines in full[4])
-	else if (depth == 2 && search->eval.n_empties > 2)
-<<<<<<< HEAD
-		return search_eval_2(search, alpha, beta, get_moves(search->board.player, search->board.opponent));
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-		return search_eval_2(search, alpha, beta, false);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	nodes_org = search_count_nodes(search);
-=======
-	cost = -search_count_nodes(search);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
+
 	nodes_org = search_count_nodes(search);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
 	search_get_movelist(search, &movelist);
@@ -2383,30 +756,10 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
 	// special cases
 	if (movelist_is_empty(&movelist)) {
 		if (can_move(search->board.opponent, search->board.player)) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 			search_update_pass_midgame(search, &eval0);
 			search->node_type[search->height] = PV_NODE;
 			node.bestscore = -PVS_midgame(search, -beta, -alpha, depth, &node);
 			search_restore_pass_midgame(search, &eval0);
-=======
-			search_update_pass_midgame(search); search->node_type[search->height] = PV_NODE;
-			node.bestscore = -PVS_midgame(search, -beta, -alpha, depth, &node);
-			search_restore_pass_midgame(search);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			search_update_pass_midgame(search, &backup.eval);
-			search->node_type[search->height] = PV_NODE;
-			node.bestscore = -PVS_midgame(search, -beta, -alpha, depth, &node);
-			search_restore_pass_midgame(search, &backup.eval);
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-=======
-			search_update_pass_midgame(search, &eval0);
-			search->node_type[search->height] = PV_NODE;
-			node.bestscore = -PVS_midgame(search, -beta, -alpha, depth, &node);
-			search_restore_pass_midgame(search, &eval0);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 			node.bestmove = PASS;
 		} else {
 			node.alpha = -(node.beta = +SCORE_INF);
@@ -2417,125 +770,29 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
 	} else { // normal PVS
 		if (movelist.n_moves > 1) {
 			//IID
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (!hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data))
-				hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data);
-=======
-			if (!vhash_get(&search->pv_table, board0, hash_code, &hash_data.data))
-				vhash_get(&search->hash_table, board0, hash_code, &hash_data.data);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-			if (!hash_get(&search->pv_table, HBOARD_V(board0), hash_code, &hash_data.data))
-				hash_get(&search->hash_table, HBOARD_V(board0), hash_code, &hash_data.data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-			if (!hash_get(&search->pv_table, &board0, hash_code, &hash_data.data))
-				hash_get(&search->hash_table, &board0, hash_code, &hash_data.data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
-			if (!hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data))
-				hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data);
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
-
-			if (USE_IID && hash_data.data.move[0] == NOMOVE) {	// (unused)
-				if (depth == search->eval.n_empties) reduced_depth = depth - ITERATIVE_MIN_EMPTIES;
-=======
-			if (!hash_get(pv_table, &search->board, hash_code, &hash_data)) hash_get(hash_table, &search->board, hash_code, &hash_data);
-=======
-			if (!hash_get(pv_table, &search->board, hash_code, &hash_data))
-				hash_get(hash_table, &search->board, hash_code, &hash_data);
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-			if (USE_IID && hash_data.move[0] == NOMOVE) {
-<<<<<<< HEAD
-				if (depth == search->n_empties) reduced_depth = depth - ITERATIVE_MIN_EMPTIES;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-=======
-			if (!hash_get(&search->pv_table, &search->board, hash_code, &hash_data))
-				hash_get(&search->hash_table, &search->board, hash_code, &hash_data);
-
-			if (USE_IID && hash_data.move[0] == NOMOVE) {	// (unused)
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-=======
 			if (!hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data))
 				hash_get(&search->hash_table, &search->board, hash_code, &hash_data.data);
 
 			if (USE_IID && hash_data.data.move[0] == NOMOVE) {	// (unused)
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 				if (depth == search->eval.n_empties) reduced_depth = depth - ITERATIVE_MIN_EMPTIES;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 				else reduced_depth = depth - 2;
 				if (reduced_depth >= 3) {
 					saved_selectivity = search->selectivity; search->selectivity = 0;
 					depth_pv_extension = search->depth_pv_extension;
 					search->depth_pv_extension = 0;
 					PVS_midgame(search, SCORE_MIN, SCORE_MAX, reduced_depth, parent);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-					hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data);
-=======
-					hash_get(pv_table, &search->board, hash_code, &hash_data);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-					hash_get(&search->pv_table, &search->board, hash_code, &hash_data);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-=======
-					hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-					vhash_get(&search->pv_table, board0, hash_code, &hash_data.data);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-					hash_get(&search->pv_table, HBOARD_V(board0), hash_code, &hash_data.data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-					hash_get(&search->pv_table, &board0, hash_code, &hash_data.data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
 					hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data);
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 					search->depth_pv_extension = depth_pv_extension;
 					search->selectivity = saved_selectivity;
 				}
 			}
 
 			// Evaluate moves for sorting. For a better ordering, the depth is artificially increased
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 			movelist_evaluate(&movelist, search, &hash_data.data, node.alpha, depth + options.inc_sort_depth[PV_NODE]);
 			movelist_sort(&movelist);
-=======
-			movelist_evaluate(&movelist, search, &hash_data, node.alpha, depth + options.inc_sort_depth[PV_NODE]);
-<<<<<<< HEAD
-			movelist_sort(&movelist) ;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			movelist_sort(&movelist);
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
 		}
 
 		// first move
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		board0 = search->board;
 		eval0 = search->eval;
 		if ((move = node_first_move(&node, &movelist))) { // why if there ?
@@ -2544,92 +801,20 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
 			search_restore_midgame(search, move->x, &eval0);
 			search->board = board0;
 			node_update(&node, move);
-=======
-		Ev0 = search->eval;
-=======
-		Ev0.feature = search->eval.feature;
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-		if ((move = node_first_move(&node, &movelist))) { // why if there ?
-			search_update_midgame(search, move); search->node_type[search->height] = PV_NODE;
-			move->score = -PVS_midgame(search, -beta, -alpha, depth - 1, &node);
-			search_restore_midgame(search, move, &Ev0);
-<<<<<<< HEAD
-			node_update(node, move);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
-		backup.board = search->board;
-		backup.eval = search->eval;
-		if ((move = node_first_move(&node, &movelist))) { // why if there ?
-			search_update_midgame(search, move); search->node_type[search->height] = PV_NODE;
-			move->score = -PVS_midgame(search, -beta, -alpha, depth - 1, &node);
-			search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-		eval0 = search->eval;
-		if ((move = node_first_move(&node, &movelist))) { // why if there ?
-			search_update_midgame(search, move); search->node_type[search->height] = PV_NODE;
-			move->score = -PVS_midgame(search, -beta, -alpha, depth - 1, &node);
-			search_restore_midgame(search, move->x, &eval0);
-<<<<<<< HEAD
-			search->board = board0.board;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-			search->board = board0;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-			node_update(&node, move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
 
 			// other moves : try to refute the first/best one
 			while ((move = node_next_move(&node))) {
 				if (!node_split(&node, move)) {
 					const int alpha = node.alpha;
 					search_update_midgame(search, move);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
 					move->score = -NWS_midgame(search, -alpha - 1, depth - 1, &node);
 					if (!search->stop && alpha < move->score && move->score < beta) {
 						search->node_type[search->height] = PV_NODE;
 						move->score = -PVS_midgame(search, -beta, -alpha, depth - 1, &node);
 					}
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-					search_restore_midgame(search, move->x, &eval0);
-					search->board = board0;
-=======
-					search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
 					search_restore_midgame(search, move->x, &eval0);
-<<<<<<< HEAD
-					search->board = board0.board;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
 					search->board = board0;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-					node_update(&node, move);
-=======
-						move->score = -NWS_midgame(search, -alpha - 1, depth - 1, node);
-=======
-						move->score = -NWS_midgame(search, -alpha - 1, depth - 1, &node);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-						if (!search->stop && alpha < move->score && move->score < beta) {
-							search->node_type[search->height] = PV_NODE;
-							move->score = -PVS_midgame(search, -beta, -alpha, depth - 1, &node);
-						}
-=======
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-					search_restore_midgame(search, move, &Ev0);
-<<<<<<< HEAD
-					node_update(node, move);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
 					node_update(&node, move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
 				}
 			}
 			node_wait_slaves(&node);
@@ -2638,13 +823,6 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
 
 	// save the best result in hash tables
 	if (!search->stop) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		if (depth <= ((search->eval.n_empties <= depth) ? DEPTH_MIDGAME_TO_ENDGAME : 4))
 			hash_data.data.wl.c.selectivity = NO_SELECTIVITY;
 		else	hash_data.data.wl.c.selectivity = search->selectivity;
@@ -2654,79 +832,9 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
 		hash_data.alpha = alpha;
 		hash_data.beta = beta;
 		hash_data.score = node.bestscore;
-<<<<<<< HEAD
-
-		hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
-		hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
-
-		// store solid-normalized for endgame TC
-		if (search->eval.n_empties <= depth && depth <= MASK_SOLID_DEPTH && depth > DEPTH_TO_SHALLOW_SEARCH) {
-			solid_opp = get_all_full_lines(search->board.player | search->board.opponent) & search->board.opponent;
-			if (solid_opp) {
-				hashboard.player = search->board.player ^ solid_opp;	// normalize solid to player
-				hashboard.opponent = search->board.opponent ^ solid_opp;
-				ofssolid = bit_count(solid_opp) * 2;	// hash score is ofssolid grater than real
-				hash_data.alpha += ofssolid;
-				hash_data.beta += ofssolid;
-				hash_data.score += ofssolid;
-				hash_store(&search->hash_table, &hashboard, board_get_hash_code(&hashboard), &hash_data);
-			}
-		}
-
-		SQUARE_STATS(foreach_move(move, movelist))
-		SQUARE_STATS(++statistics.n_played_square[search->eval.n_empties][SQUARE_TYPE[move->x]];)
-		SQUARE_STATS(if (node.bestscore > alpha) ++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[node.bestmove]];)
-=======
-		cost += search_count_nodes(search);
-		cost_bits = last_bit(cost);
-		if (search->n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_selectivity = NO_SELECTIVITY;
-		else hash_selectivity = search->selectivity;
-		hash_store(hash_table, &search->board, hash_code, depth, hash_selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
-		hash_store(pv_table, &search->board, hash_code, depth, hash_selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
-=======
-		if (search->n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.selectivity = NO_SELECTIVITY;
-		else hash_store_data.data.selectivity = search->selectivity;
-		hash_store_data.data.depth = depth;
-		hash_store_data.data.cost = last_bit(search_count_nodes(search) - nodes_org);
-=======
-		if (search->n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY;
-=======
-		if (search->eval.n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-		else hash_store_data.data.wl.c.selectivity = search->selectivity;
-=======
-		/* if (search->eval.n_empties < depth && depth <= DEPTH_MIDGAME_TO_ENDGAME) hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY;
-		else */ hash_store_data.data.wl.c.selectivity = search->selectivity;
->>>>>>> c0fb778 (small optimizations in endgame)
-=======
-		if (search->eval.n_empties <= depth && depth <= DEPTH_MIDGAME_TO_ENDGAME)
-=======
-		if (depth <= ((search->eval.n_empties <= depth) ? DEPTH_MIDGAME_TO_ENDGAME : 4))
->>>>>>> af8242f (Imply NO_SELECTIVITY in shallow searches)
-			hash_store_data.data.wl.c.selectivity = NO_SELECTIVITY;
-		else	hash_store_data.data.wl.c.selectivity = search->selectivity;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-		hash_store_data.data.wl.c.depth = depth;
-		hash_store_data.data.wl.c.cost = last_bit(search_count_nodes(search) - nodes_org);
-		hash_store_data.data.move[0] = node.bestmove;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-		hash_store_data.alpha = alpha;
-		hash_store_data.beta = beta;
-		hash_store_data.score = node.bestscore;
-
-<<<<<<< HEAD
-		hash_store(hash_table, &search->board, hash_code, &hash_store_data);
-		hash_store(pv_table, &search->board, hash_code, &hash_store_data);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-		hash_store(&search->hash_table, &search->board, hash_code, &hash_store_data);
-		hash_store(&search->pv_table, &search->board, hash_code, &hash_store_data);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-=======
 
 		hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
 		hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 
 		// store solid-normalized for endgame TC
 		if (search->eval.n_empties <= depth && depth <= MASK_SOLID_DEPTH && depth > DEPTH_TO_SHALLOW_SEARCH) {
@@ -2743,14 +851,8 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
 		}
 
 		SQUARE_STATS(foreach_move(move, movelist))
-<<<<<<< HEAD
-		SQUARE_STATS(++statistics.n_played_square[search->n_empties][SQUARE_TYPE[move->x]];)
-		SQUARE_STATS(if (node.bestscore > alpha) ++statistics.n_good_square[search->n_empties][SQUARE_TYPE[node.bestmove]];)
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		SQUARE_STATS(++statistics.n_played_square[search->eval.n_empties][SQUARE_TYPE[move->x]];)
 		SQUARE_STATS(if (node.bestscore > alpha) ++statistics.n_good_square[search->eval.n_empties][SQUARE_TYPE[node.bestmove]];)
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 
 	 	assert(SCORE_MIN <= node.bestscore && node.bestscore <= SCORE_MAX);
 
diff --git a/src/move.c b/src/move.c
index f490a53..19b7c49 100644
--- a/src/move.c
+++ b/src/move.c
@@ -3,19 +3,7 @@
  *
  * @brief Move & list of moves management.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2022
->>>>>>> 5de3726 (inline board_update and omit restore)
-=======
- * @date 1998 - 2023
->>>>>>> 61f4b30 (dirty fix for ICC linux optimization bug)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -132,132 +120,6 @@ void move_print(const int x, const int player, FILE *f)
 }
 
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-/**
- * @brief Check if a move wins 64-0.
- *
- * Check if a move flipped all the opponent discs.
- *
- * @param move Move.
- * @param board Board.
- * @return true if a move wins 64-0, false otherwise.
- */
-bool move_wipeout(const Move *move, const Board *board)
-{
-	return move->flipped == board->opponent;
-}
-
-#ifdef TUNE_EDAX
-static int w_hash = 1 << 15;
-static int w_eval = 1 << 15;
-static int w_mobility = 1 << 15;
-static int w_corner_stability = 1 << 11;
-static int w_edge_stability = 1 << 11;
-static int w_potential_mobility = 1 << 5;
-static int w_low_parity = 1 << 3;
-static int w_mid_parity = 1 << 2;
-static int w_high_parity = 1 << 1;
-#endif
-
-
-/**
- * @brief Evaluate a list of move.
- *
- * Evaluate the moves to sort them. Evaluation is based on, in order of importance:
- *   - wipeout move    : 1 << 30
- *   - first hash move : 1 << 29
- *   - second hash move: 1 << 28
- *   - shallow search  : 1 << 22 to 1 << 14
- *   - opponent mobility:                 1 << 15            32768...1048576
- *   - player stability near the corner:  1 << 11             2048...24576
- *   - opponent potential mobility:       1 << 5                32...1024
- *   - square value                       1 << 1:               2 ...18
- *   - parity:                            1 << 0:               0 ... 1
- *
- * @param move   Move to evaluate.
- * @param search Position to evaluate.
- * @param hash_data   Position (maybe) stored in the hashtable.
- * @param sort_alpha   Alpha bound to evaluate moves.
- * @param sort_depth   depth for the shallow search
- */
-static void move_evaluate(Move *move, Search *search, const HashData *hash_data, const int sort_alpha, const int sort_depth)
-{
-#ifndef TUNE_EDAX
-	const int w_hash = 1 << 15;
-	const int w_eval = 1 << 15;
-	const int w_mobility = 1 << 15;
-	const int w_corner_stability = 1 << 11;
-	const int w_edge_stability = 1 << 11;
-	const int w_potential_mobility = 1 << 5;
-	const int w_low_parity = 1 << 3;
-	const int w_mid_parity = 1 << 2;
-	const int w_high_parity = 1 << 1;
-#endif	
-	int	score, empties, parity_weight;
-	HashData dummy[1];
-	unsigned long long P, O;
-	Search_Backup backup;
-
-	if (move_wipeout(move, &search->board)) move->score = (1 << 30);
-	else if (move->x == hash_data->move[0]) move->score = (1 << 29);
-	else if (move->x == hash_data->move[1]) move->score = (1 << 28);
-	else {
-		score = SQUARE_VALUE[move->x]; // square type
-		empties = search->eval.n_empties;
-		if (empties < 30) {	// https://eukaryote.hateblo.jp/entry/2020/05/16/082757
-			parity_weight = (empties < 12) ? w_low_parity : ((empties < 21) ? w_mid_parity : w_high_parity);
-			score += (search->eval.parity & QUADRANT_ID[move->x]) ? parity_weight : 0;
-		}
-
-		if (sort_depth < 0) {
-			// board_update(&search->board, move);
-			O = search->board.player ^ (move->flipped | X_TO_BIT[move->x]);
-			P = search->board.opponent ^ move->flipped;
-			SEARCH_UPDATE_ALL_NODES(search->n_nodes);
-			score += (36 - get_potential_mobility(P, O)) * w_potential_mobility; // potential mobility
-			score += get_corner_stability(O) * w_corner_stability; // corner stability
-			score += (36 - get_weighted_mobility(P, O)) * w_mobility; // real mobility
-			// board_restore(&search->board, move);
-
-		} else {
-			int selectivity = search->selectivity;
-			search->selectivity = NO_SELECTIVITY;
-			backup.board = search->board;
-			backup.eval = search->eval;
-			search_update_midgame(search, move);
-
-			SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
-			score += (36 - get_potential_mobility(search->board.player, search->board.opponent)) * w_potential_mobility; // potential mobility
-			score += get_edge_stability(search->board.opponent, search->board.player) * w_edge_stability; // edge stability
-			score += (36 - get_weighted_mobility(search->board.player, search->board.opponent)) *  w_mobility; // real mobility
-			switch(sort_depth) {
-			case 0:
-				score += ((SCORE_MAX - search_eval_0(search)) >> 2) * w_eval; // 1 level score bonus
-				break;
-			case 1:
-				score += ((SCORE_MAX - search_eval_1(search, SCORE_MIN, -sort_alpha, get_moves(search->board.player, search->board.opponent))) >> 1) * w_eval;  // 2 level score bonus
-				break;
-			case 2:
-				score += ((SCORE_MAX - search_eval_2(search, SCORE_MIN, -sort_alpha, get_moves(search->board.player, search->board.opponent))) >> 1) * w_eval;  // 3 level score bonus
-				break;
-			default:
-				if (hash_get(&search->hash_table, &search->board, board_get_hash_code(&search->board), dummy)) score += w_hash; // bonus if the position leads to a position stored in the hash-table
-				score += ((SCORE_MAX - PVS_shallow(search, SCORE_MIN, -sort_alpha, sort_depth))) * w_eval; // > 3 level bonus
-				break;
-			}
-
-			search_restore_midgame(search, move->x, &backup);
-			search->selectivity = selectivity;
-		}
-		move->score = score;
-	}
-}
-
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
 #ifdef TUNE_EDAX
 #include "solver.h"
 
@@ -310,8 +172,6 @@ void tune_move_evaluate(Search *search, const char *filename, const char *w_name
 int movelist_get_moves(MoveList *movelist, const Board *board)
 {
 	Move *previous = movelist->move;
-<<<<<<< HEAD
-<<<<<<< HEAD
 	Move *move;
 	unsigned long long moves = board_get_moves(board);
 	int x, n;
@@ -328,36 +188,6 @@ int movelist_get_moves(MoveList *movelist, const Board *board)
 	previous->next = NULL;
 	movelist->n_moves = n;
 	return n;
-=======
-	Move *move = movelist->move + 1;
-	unsigned long long moves = get_moves(board->player, board->opponent);
-	int x;
-=======
-	Move *move;
-	unsigned long long moves = board_get_moves(board);
-	int x, n;
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
-
-	n = 0;
-	foreach_bit (x, moves) {
-		move = previous + 1;
-		previous->next = move;
-		board_get_move_flip(board, x, move);
-		// move->score = -SCORE_INF;	// -INT_MAX?
-		previous = move;
-		++n;
-	}
-	previous->next = NULL;
-<<<<<<< HEAD
-
-	assert(movelist->n_moves == bit_count(moves));
-
-	return movelist->n_moves;
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
-	movelist->n_moves = n;
-	return n;
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 }
 
 /**
@@ -444,10 +274,6 @@ static int w_potential_mobility = 1 << 5;
 static int w_low_parity = 1 << 3;
 static int w_mid_parity = 1 << 2;
 static int w_high_parity = 1 << 1;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
 #else
 enum {
 	w_hash = 1 << 15,
@@ -474,25 +300,10 @@ void movelist_evaluate_fast(MoveList *movelist, Search *search, const HashData *
 {
 	Move	*move;
 	int	score, parity_weight;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
 
 	// if (search->eval.n_empties < 21)	// mostly true
 		parity_weight = (search->eval.n_empties < 12) ? w_low_parity : w_mid_parity;
 	// else	parity_weight = (search->eval.n_empties < 30) ? w_high_parity : 0;
-=======
-	unsigned long long P, O;
-
-	// if (search->eval.n_empties < 21)	// mostly true
-		parity_weight = (search->eval.n_empties < 12) ? w_low_parity : w_mid_parity;
-<<<<<<< HEAD
-	else	parity_weight = (search->eval.n_empties < 30) ? w_high_parity : 0;
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
-=======
-	// else	parity_weight = (search->eval.n_empties < 30) ? w_high_parity : 0;
->>>>>>> 8a7e354 (Exclude hash init time from count games; other minor size opts)
 
 	move = movelist->move[0].next;
 	do {
@@ -501,8 +312,6 @@ void movelist_evaluate_fast(MoveList *movelist, Search *search, const HashData *
 		else if (move->x == hash_data->move[0]) score = (1 << 29);
 		else if (move->x == hash_data->move[1]) score = (1 << 28);
 		else {
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifdef __AVX2__
 			__m128i PO = _mm_xor_si128(*(__m128i *) &search->board,
 				_mm_or_si128(_mm_set1_epi64x(move->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
@@ -511,102 +320,30 @@ void movelist_evaluate_fast(MoveList *movelist, Search *search, const HashData *
 			score += (36 - bit_weighted_count(_mm_extract_epi64(MM, 1))) * w_potential_mobility; // potential mobility
 			score += (36 - bit_weighted_count(_mm_cvtsi128_si64(MM))) * w_mobility; // real mobility
 
-=======
-#ifdef __AVX2__
-			__m128i PO = _mm_xor_si128(*(__m128i *) &search->board,
-				_mm_or_si128(_mm_broadcastq_epi64(*(__m128i *) &move->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x])));
-			score  = get_corner_stability(_mm_cvtsi128_si64(PO)) * w_corner_stability; // corner stability
-<<<<<<< HEAD
-<<<<<<< HEAD
-			__m128i MM = get_moves_and_potential(_mm256_permute4x64_epi64(_mm256_castsi128_si256(PO), 0x55), _mm256_broadcastq_epi64(PO));
-			score += (36 - bit_weighted_count(_mm_extract_epi64(MM, 1))) * w_potential_mobility; // potential mobility
-			score += (36 - bit_weighted_count(_mm_cvtsi128_si64(MM))) * w_mobility; // real mobility
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
-=======
-			MM.v2 = get_moves_and_potential(_mm256_broadcastq_epi64(_mm_unpackhi_epi64(PO, PO)), _mm256_broadcastq_epi64(PO));
-			score += (36 - bit_weighted_count(MM.ull[1])) * w_potential_mobility; // potential mobility
-			score += (36 - bit_weighted_count(MM.ull[0])) * w_mobility; // real mobility
-<<<<<<< HEAD
->>>>>>> 47c2589 (Fix w32-modern build and gcc build)
-=======
-=======
-			__m128i MM = get_moves_and_potential(_mm256_broadcastq_epi64(_mm_unpackhi_epi64(PO, PO)), _mm256_broadcastq_epi64(PO));
-			score += (36 - bit_weighted_count(_mm_extract_epi64(MM, 1))) * w_potential_mobility; // potential mobility
-			score += (36 - bit_weighted_count(_mm_cvtsi128_si64(MM))) * w_mobility; // real mobility
->>>>>>> bcc211a (Add _mm_extract_epi64 to x86 sim)
-
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
 #else
 			unsigned long long O = search->board.player ^ (move->flipped | x_to_bit(move->x));
 			unsigned long long P = search->board.opponent ^ move->flipped;
 			score  = get_corner_stability(O) * w_corner_stability; // corner stability
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
   #if defined(hasSSE2) && !defined(POPCOUNT)
 			__m128i MM = bit_weighted_count_sse(get_moves(P, O), get_potential_moves(P, O));
 			score += (36 - _mm_extract_epi16(MM, 4)) * w_potential_mobility; // potential mobility
 			score += (36 - _mm_cvtsi128_si32(MM)) * w_mobility; // real mobility
-<<<<<<< HEAD
-<<<<<<< HEAD
   #elif defined(__ARM_NEON)
-=======
-  #elif defined(hasNeon)
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-=======
-  #elif defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 			uint64x2_t MM = bit_weighted_count_neon(get_moves(P, O), get_potential_moves(P, O));
 			score += (36 - vgetq_lane_u32(vreinterpretq_u32_u64(MM), 2)) * w_potential_mobility; // potential mobility
 			score += (36 - vgetq_lane_u32(vreinterpretq_u32_u64(MM), 0)) * w_mobility; // real mobility
   #else
-<<<<<<< HEAD
-<<<<<<< HEAD
-			score += (36 - bit_weighted_count(get_potential_moves(P, O))) * w_potential_mobility; // potential mobility
-=======
-			score += (36 - get_potential_mobility(P, O)) * w_potential_mobility; // potential mobility
->>>>>>> e3cea41 (New vectored bit_weighted_count_sse)
-=======
 			score += (36 - bit_weighted_count(get_potential_moves(P, O))) * w_potential_mobility; // potential mobility
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
 			score += (36 - bit_weighted_count(get_moves(P, O))) * w_mobility; // real mobility
   #endif
 #endif
 			score += SQUARE_VALUE[move->x]; // square type
 			score += (search->eval.parity & QUADRANT_ID[move->x]) ? parity_weight : 0; // parity
 			SEARCH_UPDATE_ALL_NODES(search->n_nodes);
-=======
-			score = SQUARE_VALUE[move->x]; // square type
-			score += (search->eval.parity & QUADRANT_ID[move->x]) ? parity_weight : 0; // parity
-
-			// board_update(&search->board, move);
-			O = search->board.player ^ (move->flipped | x_to_bit(move->x));
-			P = search->board.opponent ^ move->flipped;
-			SEARCH_UPDATE_ALL_NODES(search->n_nodes);
-=======
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
-			score += (36 - get_potential_mobility(P, O)) * w_potential_mobility; // potential mobility
-			score += (36 - get_weighted_mobility(P, O)) * w_mobility; // real mobility
-<<<<<<< HEAD
-			// board_restore(&search->board, move);
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
-=======
-#endif
-			score += SQUARE_VALUE[move->x]; // square type
-			score += (search->eval.parity & QUADRANT_ID[move->x]) ? parity_weight : 0; // parity
-			SEARCH_UPDATE_ALL_NODES(search->n_nodes);
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
 		}
 		move->score = score;
 	} while ((move = move->next));
 }
-<<<<<<< HEAD
-=======
-#endif
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
 
 /**
  * @brief Evaluate a list of move in order to sort it.
@@ -630,8 +367,6 @@ void movelist_evaluate_fast(MoveList *movelist, Search *search, const HashData *
  */
 void movelist_evaluate(MoveList *movelist, Search *search, const HashData *hash_data, const int alpha, const int depth)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	static const char min_depth_table[64] = {
 		19, 18, 18, 18, 17, 17, 17, 16,	// (Never for empties < 14)
 		16, 16, 15, 15, 15, 14, 14, 14,
@@ -648,93 +383,17 @@ void movelist_evaluate(MoveList *movelist, Search *search, const HashData *hash_
 	HashData dummy;
 	Eval eval0;
 	Board board0;
-=======
-#ifndef TUNE_EDAX
-	enum {
-		w_hash = 1 << 15,
-		w_eval = 1 << 15,
-		w_mobility = 1 << 15,
-		w_corner_stability = 1 << 11,
-		w_edge_stability = 1 << 11,
-		w_potential_mobility = 1 << 5,
-		w_low_parity = 1 << 3,
-		w_mid_parity = 1 << 2,
-		w_high_parity = 1 << 1
-	};
-#endif
-=======
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
-	static const char min_depth_table[64] = {
-		19, 18, 18, 18, 17, 17, 17, 16,	// (Never for empties < 14)
-		16, 16, 15, 15, 15, 14, 14, 14,
-		13, 13, 13, 12, 12, 12, 11, 11,
-		11, 10, 10, 10,  9,  9,  9,  9,
-		 9,  9,  9,  9,  9,  9,  9,  9,
-		 9,  9,  9,  9,  9,  9,  9,  9,
-		 9,  9,  9,  9,  9,  9,  9,  9,
-		 9,  9,  9,  9,  9,  9,  9,  9
-	};
-	Move *move;
-	int	sort_depth, min_depth, sort_alpha, score, empties, parity_weight;
-	unsigned long long moves;
-	HashData dummy;
-	Eval eval0;
-	Board board0;
-
-	empties = search->eval.n_empties;
-<<<<<<< HEAD
-<<<<<<< HEAD
-#ifdef TUNE_EDAX
-	parity_weight = (empties < 12) ? w_low_parity : ((empties < 21) ? w_mid_parity : ((empties < 30) ? w_high_parity : 0));
-<<<<<<< HEAD
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
 
 	empties = search->eval.n_empties;
-=======
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
 	// min_depth = 9;
 	// if (empties <= 27) min_depth += (30 - empties) / 3;
 	min_depth = min_depth_table[empties];
 
-<<<<<<< HEAD
 	if (depth >= min_depth) {
 		if (empties < 21)
 			parity_weight = (empties < 12) ? w_low_parity : w_mid_parity;
 		else	parity_weight = (empties < 30) ? w_high_parity : 0;
-<<<<<<< HEAD
-=======
-	min_depth = 9;
-	if (empties <= 27) min_depth += (30 - empties) / 3;
-=======
-#else
-	parity_weight = parity_weight_table[empties];
-#endif
-=======
-	if (empties < 21)
-		parity_weight = (empties < 12) ? w_low_parity : w_mid_parity;
-	else	parity_weight = (empties < 30) ? w_high_parity : 0;
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
-	// min_depth = 9;
-	// if (empties <= 27) min_depth += (30 - empties) / 3;
-	min_depth = min_depth_table[empties];
 
->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup)
-	if (depth >= min_depth) {
-		sort_depth = (depth - 15) / 3;
-<<<<<<< HEAD
-		if (hash_data && hash_data->upper < alpha) sort_depth -= 2; 
-<<<<<<< HEAD
-		if (search->eval.n_empties >= 27) ++sort_depth;
-		if (sort_depth < 0) sort_depth = 0; else if (sort_depth > 6) sort_depth = 6;
-	} else {
-		sort_depth = -1;
-	}
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-
-<<<<<<< HEAD
-=======
-
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
 		sort_depth = (depth - 15) / 3;
 		if (hash_data->upper < alpha) sort_depth -= 2;
 		if (empties >= 27) ++sort_depth;
@@ -743,43 +402,14 @@ void movelist_evaluate(MoveList *movelist, Search *search, const HashData *hash_
 
 		board0 = search->board;
 		eval0 = search->eval;
-<<<<<<< HEAD
-=======
-=======
-		if (hash_data->upper < alpha) sort_depth -= 2;
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
-		if (empties >= 27) ++sort_depth;
-		if (sort_depth < 0) sort_depth = 0;
-		else if (sort_depth > 6) sort_depth = 6;
-
-		backup.board = search->board;
-		backup.eval = search->eval;
-<<<<<<< HEAD
-		org_selectivity = search->selectivity;
-		search->selectivity = NO_SELECTIVITY;
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
->>>>>>> af8242f (Imply NO_SELECTIVITY in shallow searches)
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 		sort_alpha = MAX(SCORE_MIN, alpha - SORT_ALPHA_DELTA);
 
 		move = movelist->move[0].next;
 		do {
 			// move_evaluate(move, search, hash_data, sort_alpha, sort_depth);
 			if (move_wipeout(move, &search->board)) score = (1 << 30);
-<<<<<<< HEAD
-<<<<<<< HEAD
 			else if (move->x == hash_data->move[0] && hash_data->wl.c.depth > sort_depth - 3) score = (1 << 29);	// https://github.com/eukaryo/edax-reversi-AVX-v446mod2
 			else if (move->x == hash_data->move[1] && hash_data->wl.c.depth > sort_depth - 3) score = (1 << 28);
-=======
-			else if (move->x == hash_data->move[0]) score = (1 << 29);
-			else if (move->x == hash_data->move[1]) score = (1 << 28);
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
-			else if (move->x == hash_data->move[0] && hash_data->wl.c.depth > sort_depth - 3) score = (1 << 29);	// https://github.com/eukaryo/edax-reversi-AVX-v446mod2
-			else if (move->x == hash_data->move[1] && hash_data->wl.c.depth > sort_depth - 3) score = (1 << 28);
->>>>>>> dd4bc2c (Update move.c)
 			else {
 				score = SQUARE_VALUE[move->x]; // square type
 				score += (search->eval.parity & QUADRANT_ID[move->x]) ? parity_weight : 0; // parity
@@ -787,18 +417,8 @@ void movelist_evaluate(MoveList *movelist, Search *search, const HashData *hash_
 				search_update_midgame(search, move);
 
 				SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
-<<<<<<< HEAD
-<<<<<<< HEAD
 #ifdef __AVX2__
-<<<<<<< HEAD
-<<<<<<< HEAD
-				__m128i MM =  get_moves_and_potential(_mm256_set1_epi64x(search->board.player), _mm256_set1_epi64x(search->board.opponent));
-=======
-				__m128i MM =  get_moves_and_potential(_mm256_broadcastq_epi64(*(__m128i *) &search->board.player), _mm256_broadcastq_epi64(*(__m128i *) &search->board.opponent));
->>>>>>> bcc211a (Add _mm_extract_epi64 to x86 sim)
-=======
 				__m128i MM =  get_moves_and_potential(_mm256_set1_epi64x(search->board.player), _mm256_set1_epi64x(search->board.opponent));
->>>>>>> 88b2057 (Replace broadcast from memory with set1)
 				score += (36 - bit_weighted_count(_mm_extract_epi64(MM, 1))) * w_potential_mobility; // potential mobility
 				score += (36 - bit_weighted_count(moves = _mm_cvtsi128_si64(MM))) * w_mobility; // real mobility
 #else
@@ -844,99 +464,6 @@ void movelist_evaluate(MoveList *movelist, Search *search, const HashData *hash_
 
 	} else	// sort_depth = -1
 		movelist_evaluate_fast(movelist, search, hash_data);
-=======
-	sort_alpha = MAX(SCORE_MIN, alpha - SORT_ALPHA_DELTA);
-	foreach_move (move, *movelist) {
-		move_evaluate(move, search, hash_data, sort_alpha, sort_depth);
-=======
-=======
-#ifdef __AVX2__
-				V2DI MM;
-				MM.v2 =  get_moves_and_potential(_mm256_broadcastq_epi64(*(__m128i *) &search->board.player), _mm256_broadcastq_epi64(*(__m128i *) &search->board.opponent));
-				score += (36 - bit_weighted_count(MM.ull[1])) * w_potential_mobility; // potential mobility
-				score += (36 - bit_weighted_count(moves = MM.ull[0])) * w_mobility; // real mobility
-#else
-				moves = board_get_moves(&search->board);
-  #if defined(hasSSE2) && !defined(POPCOUNT)
-				__m128i MM = bit_weighted_count_sse(moves, get_potential_moves(search->board.player, search->board.opponent));
-				score += (36 - _mm_extract_epi16(MM, 4)) * w_potential_mobility; // potential mobility
-				score += (36 - _mm_cvtsi128_si32(MM)) * w_mobility; // real mobility
-  #elif defined(__ARM_NEON)
-				uint64x2_t MM = bit_weighted_count_neon(moves, get_potential_moves(search->board.player, search->board.opponent));
-				score += (36 - vgetq_lane_u32(vreinterpretq_u32_u64(MM), 2)) * w_potential_mobility; // potential mobility
-				score += (36 - vgetq_lane_u32(vreinterpretq_u32_u64(MM), 0)) * w_mobility; // real mobility
-<<<<<<< HEAD
-#else
-<<<<<<< HEAD
->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2)
-				score += (36 - get_potential_mobility(search->board.player, search->board.opponent)) * w_potential_mobility; // potential mobility
-=======
-				score += (36 - bit_weighted_count(get_potential_moves(search->board.player, search->board.opponent))) * w_potential_mobility; // potential mobility
->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations)
-				score += (36 - bit_weighted_count(board_get_moves(&search->board))) * w_mobility; // real mobility
-=======
-  #else
-				score += (36 - bit_weighted_count(get_potential_moves(search->board.player, search->board.opponent))) * w_potential_mobility; // potential mobility
-				score += (36 - bit_weighted_count(moves)) * w_mobility; // real mobility
-  #endif
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
-#endif
-				score += get_edge_stability(search->board.opponent, search->board.player) * w_edge_stability; // edge stability
-				switch (sort_depth) {
-				case 0:
-					score += (SCORE_MAX - search_eval_0(search)) * (w_eval >> 2);	// 1 level score bonus
-					break;
-				case 1:
-					score += (SCORE_MAX + search_eval_1(search, sort_alpha, SCORE_MAX, moves)) * (w_eval >> 1);	// 2 level score bonus
-					break;
-				case 2:
-					score += (SCORE_MAX - search_eval_2(search, SCORE_MIN, -sort_alpha, moves)) * (w_eval >> 1);	// 3 level score bonus
-					break;
-				default:	// 3 to 6
-					if (hash_get_from_board(&search->hash_table, &search->board, &dummy)) score += w_hash;	// bonus if the position leads to a position stored in the hash-table
-					// org_selectivity = search->selectivity;
-					// search->selectivity = NO_SELECTIVITY;	// No probcut in PVS_shallow
-					score += ((SCORE_MAX - PVS_shallow(search, SCORE_MIN, -sort_alpha, sort_depth))) * w_eval;	// > 3 level bonus
-					// search->selectivity = org_selectivity;
-					break;
-				}
-
-				search_restore_midgame(search, move->x, &eval0);
-				search->board = board0;
-			}
-			move->score = score;
-		} while ((move = move->next));
-
-<<<<<<< HEAD
-	} else {	// sort_depth = -1
-		move = movelist->move[0].next;
-		do {
-			// move_evaluate(move, search, hash_data, sort_alpha, -1);
-			if (move_wipeout(move, &search->board)) score = (1 << 30);
-			else if (move->x == hash_data->move[0]) score = (1 << 29);
-			else if (move->x == hash_data->move[1]) score = (1 << 28);
-			else {
-				score = SQUARE_VALUE[move->x]; // square type
-				score += (search->eval.parity & QUADRANT_ID[move->x]) ? parity_weight : 0; // parity
-
-				// board_update(&search->board, move);
-				O = search->board.player ^ (move->flipped | x_to_bit(move->x));
-				P = search->board.opponent ^ move->flipped;
-				SEARCH_UPDATE_ALL_NODES(search->n_nodes);
-				score += (36 - get_potential_mobility(P, O)) * w_potential_mobility; // potential mobility
-				score += get_corner_stability(O) * w_corner_stability; // corner stability
-				score += (36 - get_weighted_mobility(P, O)) * w_mobility; // real mobility
-				// board_restore(&search->board, move);
-			}
-			move->score = score;
-		} while ((move = move->next));
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-	}
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	} else	// sort_depth = -1
-		movelist_evaluate_fast(movelist, search, hash_data);
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
 }
 
 /**
@@ -971,48 +498,7 @@ Move* movelist_sort_bestmove(MoveList *movelist, const int move)
  */
 void movelist_sort_cost(MoveList *movelist, const HashData *hash_data)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	Move *iter, *prev, *m, *hashmove0, *hashmove1;
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-	hashmove0 = hashmove1 = NULL;
-	for (prev = iter = &movelist->move[0]; (m = prev->next); prev = m) {
-		if (m->x == hash_data->move[0])
-			hashmove0 = prev;
-		if (m->x == hash_data->move[1])
-			hashmove1 = prev;
-=======
-	foreach_move(iter, movelist) {
-=======
-	foreach_move(iter, *movelist) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-		if (iter->x == hash_data->move[0]) iter->cost = INT_MAX;
-		else if (iter->x == hash_data->move[1]) iter->cost = INT_MAX - 1;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-	}
-	if (hashmove0) {
-		m = hashmove0->next;
-		hashmove0->next = m->next;
-		m->next = iter->next;
-		if (hashmove1 == iter)
-			hashmove1 = m;
-		iter = iter->next = m;
-	}
-	if (hashmove1) {
-		m = hashmove1->next;
-		hashmove1->next = m->next;
-		m->next = iter->next;
-		iter = iter->next = m;
-	}
-=======
-	Move *iter, *m;
-	Move *hashmove[2];
-	int	i;
-=======
 	Move *iter, *prev, *m, *hashmove0, *hashmove1;
->>>>>>> cbf92ce (Fix occasional freezes)
 
 	hashmove0 = hashmove1 = NULL;
 	for (prev = iter = &movelist->move[0]; (m = prev->next); prev = m) {
@@ -1035,19 +521,6 @@ void movelist_sort_cost(MoveList *movelist, const HashData *hash_data)
 		m->next = iter->next;
 		iter = iter->next = m;
 	}
-<<<<<<< HEAD
-	iter = &movelist->move[0];
-	for (i = 0; i <= 1; ++i)
-		if (hashmove[i]) {
-			m = hashmove[i]->next;
-			hashmove[i]->next = m->next;
-			m->next = iter->next;
-			iter->next = m;
-			iter = iter->next;
-		}
->>>>>>> ad8c72e (refactor movelist_sort and other sorts)
-=======
->>>>>>> cbf92ce (Fix occasional freezes)
 	while ((iter = move_next_most_expensive(iter)))
 		;
 }
@@ -1058,37 +531,6 @@ void movelist_sort_cost(MoveList *movelist, const HashData *hash_data)
  */
 void movelist_sort(MoveList *movelist)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// foreach_best_move(move, *movelist) ;
-
-	Move *previous_best = &movelist->move[0];
-	while (previous_best->next->next) {	// until last 2
-		Move *best = previous_best;
-		Move *move = previous_best->next;
-		do {
-			if (move->next->score > best->next->score)
-				best = move;
-			move = move->next;
-		} while (move->next);
-		// if (previous_best != best) {
-		move = best->next;
-		best->next = move->next;
-		move->next = previous_best->next;
-		previous_best->next = move;
-		// }
-		previous_best = previous_best->next;
-	}
-=======
-	Move *move;
-
-<<<<<<< HEAD
-	foreach_best_move(move, movelist) ;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	foreach_best_move(move, *movelist) ;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	// foreach_best_move(move, *movelist) ;
 
 	Move *previous_best = &movelist->move[0];
@@ -1108,7 +550,6 @@ void movelist_sort(MoveList *movelist)
 		// }
 		previous_best = previous_best->next;
 	}
->>>>>>> ad8c72e (refactor movelist_sort and other sorts)
 }
 
 /**
diff --git a/src/move.h b/src/move.h
index 43b4891..31722b6 100644
--- a/src/move.h
+++ b/src/move.h
@@ -3,15 +3,7 @@
  *
  * @brief Move & list of moves management - header file.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2022
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -35,13 +27,6 @@ typedef struct Move {
 
 /** (simple) list of a legal moves */
 typedef struct MoveList {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	Move move[MAX_MOVE + 1];   /**< array of legal moves */
->>>>>>> 4b74548 (Fix MAX_MOVE)
-=======
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 	int n_moves;
 	Move move[MAX_MOVE + 1];   /**< array of legal moves */
 } MoveList;
@@ -76,13 +61,6 @@ void movelist_print(const MoveList*, const int, FILE*);
 Move* movelist_sort_bestmove(MoveList*, const int);
 void movelist_evaluate_fast(MoveList*, struct Search*, const struct HashData*);
 void movelist_evaluate(MoveList*, struct Search*, const struct HashData*, const int, const int);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-// void movelist_evaluate_fast(MoveList*, struct Search*);
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
-=======
->>>>>>> 31ff745 (Split movelist_evaluate_fast from movelist_evaluate)
 
 // bool move_wipeout(const Move*, const struct Board*);	// Check if a move wins 64-0.
 #define	move_wipeout(move,board)	((move)->flipped == (board)->opponent)
@@ -104,29 +82,12 @@ bool movelist_is_single(const MoveList*);
 
 /** macro to iterate over the movelist */
 #define foreach_move(iter, movelist) \
-<<<<<<< HEAD
-<<<<<<< HEAD
-	for ((iter) = (movelist).move[0].next; (iter); (iter) = (iter)->next)
-
-/** macro to iterate over the movelist from best to worst move */
-#define foreach_best_move(iter, movelist) \
-	(iter) = &(movelist).move[0];\
-	while (((iter) = move_next_best(iter)))
-=======
-	for ((iter) = (movelist).move->next; (iter); (iter) = (iter)->next)
-
-/** macro to iterate over the movelist from best to worst move */
-#define foreach_best_move(iter, movelist) \
-	for ((iter) = movelist_best(&movelist); (iter); (iter) = move_next_best(iter))
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	for ((iter) = (movelist).move[0].next; (iter); (iter) = (iter)->next)
 
 /** macro to iterate over the movelist from best to worst move */
 #define foreach_best_move(iter, movelist) \
 	(iter) = &(movelist).move[0];\
 	while (((iter) = move_next_best(iter)))
->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1)
 
 void line_init(Line*, const int);
 void line_push(Line*, const int);
diff --git a/src/nboard.c b/src/nboard.c
index 4418846..f3345a0 100644
--- a/src/nboard.c
+++ b/src/nboard.c
@@ -136,30 +136,12 @@ void ui_loop_nboard(UI *ui)
 			nboard_send("set myname Edax%d", options.level);
 
 		} else if (strcmp(cmd, "game") == 0) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 			Game game;
 			int lastmove = parse_ggf(&game, param);
 			if (lastmove >= 0) {
 				game_get_board(&game, 60, &play->initial_board);
-<<<<<<< HEAD
-<<<<<<< HEAD
 				if (lastmove == PASS)	// https://github.com/okuhara/edax-reversi-AVX/issues/1
 					board_pass(&play->initial_board);
-=======
-			Game game[1];
-			if (parse_ggf(game, param) != param) {
-				game_get_board(game, 60, &play->initial_board);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-				if (lastmove == PASS)
-=======
-				if (lastmove == PASS)	// https://github.com/okuhara/edax-reversi-AVX/issues/1
->>>>>>> 24abc1e (Revise comments and readme)
-					board_pass(&play->initial_board);
->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too)
 				play_new(play);
 			} else {
 				nboard_fail("Cannot parse game \"%s\"", param);
diff --git a/src/obftest.c b/src/obftest.c
index 5aa56f3..7915a6f 100644
--- a/src/obftest.c
+++ b/src/obftest.c
@@ -159,18 +159,8 @@ static void obf_search(Search *search, OBF *obf, int n)
 
 	search_cleanup(search);
 	search_set_board(search, &obf->board, obf->player);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	search_set_level(search, options.level, search->eval.n_empties);
 	if (options.depth >= 0) search->options.depth = MIN(options.depth, search->eval.n_empties);
-=======
-	search_set_level(search, options.level, search->n_empties);
-	if (options.depth >= 0) search->options.depth = MIN(options.depth, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	search_set_level(search, options.level, search->eval.n_empties);
-	if (options.depth >= 0) search->options.depth = MIN(options.depth, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	if (options.selectivity >= 0) search->options.selectivity = options.selectivity;
 
 	if (options.play_type == EDAX_TIME_PER_MOVE) search_set_move_time(search, options.time);
@@ -232,15 +222,7 @@ static void obf_build(Search *search, OBF *obf, int n)
 
 	search_cleanup(search);
 	search_set_board(search, &obf->board, obf->player);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	search_set_level(search, options.level, search->eval.n_empties);
-=======
-	search_set_level(search, options.level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	search_set_level(search, options.level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	if (options.depth >= 0) {
 		search->options.depth = MAX(options.depth, search->eval.n_empties);
 		search->options.selectivity = 0;
diff --git a/src/opening.c b/src/opening.c
index ee31140..3d8297e 100644
--- a/src/opening.c
+++ b/src/opening.c
@@ -3,15 +3,7 @@
  *
  * Opening Name aliasing.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2020
-=======
- * @date 1998 - 2018
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
- * @date 1998 - 2020
->>>>>>> 0a166fd (Remove 1 element array coding style)
  * @author Richard Delorme
  * @version 4.4
  */
diff --git a/src/options.c b/src/options.c
index 5aa8320..a382d2a 100644
--- a/src/options.c
+++ b/src/options.c
@@ -3,15 +3,7 @@
  *
  * Options reader.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2022
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -28,15 +20,7 @@
 
 /** global options with default value */
 Options options = {
-<<<<<<< HEAD
-<<<<<<< HEAD
-	22, // hash table size (2^22 * 24 * 1.125 = 113MB)
-=======
-	21, // hash table size (2^21 * 24 * 2.0625 = 104MB)
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
-=======
 	22, // hash table size (2^22 * 24 * 1.125 = 113MB)
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 
 	{0,-2,-3}, // inc_sort_depth
 
diff --git a/src/perft.c b/src/perft.c
index 89c589f..d7f7c36 100644
--- a/src/perft.c
+++ b/src/perft.c
@@ -3,19 +3,7 @@
  *
  * @brief Move generator test.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2021
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
-=======
- * @date 1998 - 2022
->>>>>>> 8a7e354 (Exclude hash init time from count games; other minor size opts)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -886,36 +874,6 @@ unsigned long long shape_unique(unsigned long long shape)
 }
 
 /**
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
- * @brief Compute a hash code.
- *
- * @param shape Board shape.
- * @return The hash code of the bitboard.
- */
-unsigned long long shape_get_hash_code(const unsigned long long shape)
-{
-	unsigned long long h;
-	const unsigned char *p = (const unsigned char*)&shape;
-
-	h  = hash_rank[0][p[0]];
-	h ^= hash_rank[1][p[1]];
-	h ^= hash_rank[2][p[2]];
-	h ^= hash_rank[3][p[3]];
-	h ^= hash_rank[4][p[4]];
-	h ^= hash_rank[5][p[5]];
-	h ^= hash_rank[6][p[6]];
-	h ^= hash_rank[7][p[7]];
-	// h ^= hash_rank[8][p[8]];	// gcc9: outside array bounds
-
-	return h;
-}
-
-/**
->>>>>>> 3848d16 (Satisfy msys2 and gcc 9 warnings)
-=======
->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash)
  * Array of shape.
  */
 typedef struct {
@@ -1132,10 +1090,6 @@ void count_shapes(const Board *board, const int depth, const int size)
  * @param line line to reach the target position
  */
 bool seek_position(const Board *target, const Board *board, Line *line) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 	const unsigned long long mask = target->opponent | target->player;
 	unsigned long long moves;
 	int x;
@@ -1144,23 +1098,6 @@ bool seek_position(const Board *target, const Board *board, Line *line) {
 	if (board_equal(board, target)) return true;
 
 	moves = board_get_moves(board);
-<<<<<<< HEAD
-=======
- 	const unsigned long long mask = target->opponent | target->player;
- 	unsigned long long moves;
- 	int x;
- 	Board next;
- 	
- 	if (board_equal(board, target)) return true;
- 		
-<<<<<<< HEAD
- 	moves = get_moves(board->player, board->opponent);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
- 	moves = board_get_moves(board);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
-=======
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 	if (moves) {
 		moves &= mask;
 		foreach_bit (x, moves) {
diff --git a/src/play.c b/src/play.c
index a8414be..e08e2e0 100644
--- a/src/play.c
+++ b/src/play.c
@@ -3,15 +3,7 @@
  *
  * Edax play control.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2020
-=======
- * @date 1998 - 2018
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
- * @date 1998 - 2020
->>>>>>> 0a166fd (Remove 1 element array coding style)
  * @author Richard Delorme
  * @version 4.4
  */
@@ -306,15 +298,7 @@ void play_go(Play *play, const bool update)
 				
 		play->result = *search->result;
 		play->state = IS_WAITING;
-<<<<<<< HEAD
-<<<<<<< HEAD
-		if (!board_get_move_flip(&play->board, search->result->move, &move) && move.x != PASS) {
-=======
-		if (!board_get_move(&play->board, search->result->move, &move) && move.x != PASS) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		if (!board_get_move_flip(&play->board, search->result->move, &move) && move.x != PASS) {
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 			fatal_error("bad move found: %s\n", move_to_string(move.x, play->player, s_move));
 		}
 		if (options.verbosity) {
@@ -335,15 +319,7 @@ void play_go(Play *play, const bool update)
 			if (search->options.separator) puts(search->options.separator);
 		}
 		search_set_board(search, &play->board, play->player);
-<<<<<<< HEAD
-<<<<<<< HEAD
-		search_set_level(search, options.level, search->eval.n_empties);
-=======
-		search_set_level(search, options.level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		search_set_level(search, options.level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 		if (options.play_type == EDAX_TIME_PER_MOVE) search_set_move_time(search, options.time);
 		else search_set_game_time(search, play->time[play->player].left);
 
@@ -359,15 +335,7 @@ void play_go(Play *play, const bool update)
 		search_run(search);
 		play->result = *search->result;
 		play->state = IS_WAITING;
-<<<<<<< HEAD
-<<<<<<< HEAD
 		if (!board_get_move_flip(&play->board, search->result->move, &move) && move.x != PASS) {
-=======
-		if (!board_get_move(&play->board, search->result->move, &move) && move.x != PASS) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		if (!board_get_move_flip(&play->board, search->result->move, &move) && move.x != PASS) {
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 			fatal_error("bad move found: %s\n", move_to_string(move.x, play->player, s_move));
 		}
 		if (options.verbosity) {
@@ -416,15 +384,7 @@ void play_hint(Play *play, int n)
 		if (search->options.separator) puts(search->options.separator);
 	}
 	search_set_board(search, &play->board, play->player);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	search_set_level(search, options.level, search->eval.n_empties);
-=======
-	search_set_level(search, options.level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	search_set_level(search, options.level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	if (n > search->movelist.n_moves) n = search->movelist.n_moves;
 	info("<hint %d moves>\n", n);
 
@@ -502,28 +462,12 @@ void* play_ponder_run(void *v)
 
 		// guess opponent move and start the search
 		if (play->state == IS_PONDERING && move.x != NOMOVE) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-			board_get_move_flip(&board, move.x, &move);
-=======
-			board_get_move(&board, move.x, &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 			board_get_move_flip(&board, move.x, &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 
 			board_update(&board, &move);
 				play->ponder.board = board;
 				search_set_board(search, &board, player ^ 1);
-<<<<<<< HEAD
-<<<<<<< HEAD
 				search_set_level(search, options.level, search->eval.n_empties);
-=======
-				search_set_level(search, options.level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-				search_set_level(search, options.level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 				search_run(search);
 				if (options.info && play->state == IS_PONDERING) {
 					printf("[ponder after %s id.%d: ", move_to_string(move.x, player, m), search->id);
@@ -534,15 +478,7 @@ void* play_ponder_run(void *v)
 		} else {
 			play->ponder.board = board;
 			search_set_board(search, &board, player);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			search_set_ponder_level(search, options.level, search->eval.n_empties);
-=======
-			search_set_ponder_level(search, options.level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 			search_set_ponder_level(search, options.level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 			log_print(xboard_log, "edax (ponder)> start search\n");
 			search_run(search);
 			log_print(xboard_log, "edax (ponder)> search ended\n");
@@ -712,15 +648,7 @@ bool play_move(Play *play, int x)
 	Move move;
 
 	move = MOVE_INIT;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	board_get_move_flip(&play->board, x, &move);
-=======
-	board_get_move(&play->board, x, &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	board_get_move_flip(&play->board, x, &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 	if (board_check_move(&play->board, &move)) {
 		play_update(play, &move);
 		return true;
@@ -798,15 +726,7 @@ static int play_alternative(Play *play, Move *played, Move *alternative, int *de
 		}
 	}
 	if (search->movelist.n_moves >= 1 || played->x == NOMOVE) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		search_set_level(search, options.level, search->eval.n_empties);
-=======
-		search_set_level(search, options.level, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		search_set_level(search, options.level, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 		search->options.verbosity = 0;
 		search_run(search);
 		search->options.verbosity = options.verbosity;
@@ -1074,8 +994,6 @@ void play_print(Play *play, FILE *f)
 	bool gameover;
 
 	if (p == BLACK) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		bk = play->board.player;
 		wh = play->board.opponent;
 	} else {
@@ -1091,42 +1009,12 @@ void play_print(Play *play, FILE *f)
 	}
 
 	moves = board_get_moves(&play->board);
-<<<<<<< HEAD
 	discs[BLACK] = bit_count(bk);
 	discs[WHITE] = bit_count(wh);
 	mobility[BLACK] = get_mobility(bk, wh);
 	mobility[WHITE] = get_mobility(wh, bk);
 	gameover = (mobility[BLACK] + mobility[WHITE] == 0);
 
-=======
-		bk = board->player;
-		wh = board->opponent;
-=======
-		bk = play->board.player;
-		wh = play->board.opponent;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	} else {
-		bk = play->board.opponent;
-		wh = play->board.player;
-	}
-	if ((p ^ (play->i_game & 1)) == BLACK) {
-		bk0 = play->initial_board.player;
-		wh0 = play->initial_board.opponent;
-	} else {
-		bk0 = play->initial_board.opponent;
-		wh0 = play->initial_board.player;
-	}
-
-	moves = get_moves(play->board.player, play->board.opponent);
-=======
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
-	discs[BLACK] = bit_count(bk);
-	discs[WHITE] = bit_count(wh);
-	mobility[BLACK] = get_mobility(bk, wh);
-	mobility[WHITE] = get_mobility(wh, bk);
-	gameover = (mobility[BLACK] + mobility[WHITE] == 0);
-
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
 	memset(history, 0, 64);
 	for (i = j = 0; i < play->i_game; i++) {
 		x = play->game[i].x;
@@ -1141,15 +1029,7 @@ void play_print(Play *play, FILE *f)
 			square = 2 - (wh & 1) - 2 * (bk & 1);
 			if ((square == EMPTY) && (moves & 1))
 				square = EMPTY + 1;
-<<<<<<< HEAD
-<<<<<<< HEAD
-			fputc(color[square + 1], f);
-=======
-			fputc(color[square], f);
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
 			fputc(color[square + 1], f);
->>>>>>> bc93772 (Avoid modern compliler warnings)
 			fputc(' ', f);
 			bk >>= 1;
 			wh >>= 1;
@@ -1165,29 +1045,6 @@ void play_print(Play *play, FILE *f)
 			fprintf(f, "   %2d discs  %2d moves   ", discs[BLACK], mobility[BLACK]);
 			break;
 		case 3:
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if (gameover) fprintf(f, "       Game over        ");
-			else fprintf(f, "  ply %2d (%2d empties)   ", play->i_game + 1, board_count_empties(&play->board));
-			break;
-		case 4:
-			if (gameover) {
-				if (discs[BLACK] > discs[WHITE]) fprintf(f, "       %s won        ", player[BLACK]);
-				else if (discs[BLACK] < discs[WHITE]) fprintf(f, "       %s won        ", player[WHITE]);
-				else fprintf(f, "          draw          ");
-			} else fprintf(f, "    %s's turn (%c)    ", player[p], color[p + 1]);
-=======
-			if (gameover) fprintf(f,"       Game over        ");
-			else fprintf(f,"  ply %2d (%2d empties)   ", play->i_game + 1, board_count_empties(&play->board));
-			break;
-		case 4:
-			if (gameover) {
-				if (discs[BLACK] > discs[WHITE]) fprintf(f,"       %s won        ", player[BLACK]);
-				else if (discs[BLACK] < discs[WHITE]) fprintf(f,"       %s won        ", player[WHITE]);
-				else fprintf(f,"          draw          ");
-			} else fprintf(f,"    %s's turn (%c)    ",player[p], color[p]);
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
 			if (gameover) fprintf(f, "       Game over        ");
 			else fprintf(f, "  ply %2d (%2d empties)   ", play->i_game + 1, board_count_empties(&play->board));
 			break;
@@ -1197,7 +1054,6 @@ void play_print(Play *play, FILE *f)
 				else if (discs[BLACK] < discs[WHITE]) fprintf(f, "       %s won        ", player[WHITE]);
 				else fprintf(f, "          draw          ");
 			} else fprintf(f, "    %s's turn (%c)    ", player[p], color[p + 1]);
->>>>>>> bc93772 (Avoid modern compliler warnings)
 			break;
 		case 6:
 			fprintf(f, "   %2d discs  %2d moves   ", discs[WHITE], mobility[WHITE]);
@@ -1320,15 +1176,7 @@ bool play_force_go(Play *play, Move *move)
 				board_symetry(play->force.real + play->force.i_move, s, &sym);
 				if (board_equal(&play->board, &sym)) {
 					x = symetry(play->force.move[play->force.i_move].x, s);
-<<<<<<< HEAD
-<<<<<<< HEAD
 					board_get_move_flip(&play->board, x, move);
-=======
-					board_get_move(&play->board, x, move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-					board_get_move_flip(&play->board, x, move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 					return true;
 				}
 			}
@@ -1355,15 +1203,7 @@ void play_symetry(Play *play, const int sym)
 	board = play->initial_board;
 	for (i = 0; i  < play->n_game; ++i) {
 		x = symetry(play->game[i].x, sym);
-<<<<<<< HEAD
-<<<<<<< HEAD
-		board_get_move_flip(&board, x, &move);
-=======
-		board_get_move(&board, x, &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		board_get_move_flip(&board, x, &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		board_update(&board, &move);
 		play->game[i] = move;
 	}
diff --git a/src/qemu.sh b/src/qemu.sh
index 918667d..3c7b7e4 100644
--- a/src/qemu.sh
+++ b/src/qemu.sh
@@ -1,5 +1,3 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
 if [ "$1" = arm32 ]
 then
 make build OS=linux ARCH=arm-neon COMP=gcc CC=arm-linux-gnueabi-gcc
@@ -15,34 +13,5 @@ make build OS=linux ARCH=arm COMP=gcc CC=aarch64-linux-gnu-gcc
 cd ../bin
 qemu-aarch64 -L /usr/aarch64-linux-gnu ./lEdax-arm -n 1 -l 60 -solve ../problem/fforum-20-39.obf
 fi
-=======
-=======
-if [ "$1" = arm32 ]
-then
-make build OS=linux ARCH=arm-neon COMP=gcc CC=arm-linux-gnueabi-gcc
-cd ../bin
-qemu-arm -L /usr/arm-linux-gnueabi ./lEdax-arm-neon -n 1 -l 60 -solve ../problem/fforum-20-39.obf
-elif [ "$1" = sve ]
-then
-make build OS=linux ARCH=arm-sve COMP=gcc CC=aarch64-linux-gnu-gcc
-cd ../bin
-qemu-aarch64 -L /usr/aarch64-linux-gnu -cpu max,sve128=on ./lEdax-arm-sve -n 1 -l 60 -solve ../problem/fforum-20-39.obf
-else
-<<<<<<< HEAD
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
-make build OS=linux ARCH=ARM COMP=gcc CC=aarch64-linux-gnu-gcc
-cd ../bin
-qemu-aarch64 -L /usr/aarch64-linux-gnu ./lEdax-ARM -n 1 -l 60 -solve ../problem/fforum-20-39.obf
-<<<<<<< HEAD
-# qemu-arm -L /usr/arm-linux-gnueabi ./lEdax-ARMv7 -n 1 -l 60 -solve ../problem/fforum-20-39.obf
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
-=======
-make build OS=linux ARCH=arm COMP=gcc CC=aarch64-linux-gnu-gcc
-cd ../bin
-qemu-aarch64 -L /usr/aarch64-linux-gnu ./lEdax-arm -n 1 -l 60 -solve ../problem/fforum-20-39.obf
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-fi
->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c)
 cd ../src
 # C:\Android\android-ndk-r21\toolchains\llvm\prebuilt\windows-x86_64\bin\clang.exe --target=aarch64-linux-gnu -O2 -S flip_neon_bitscan.c
diff --git a/src/root.c b/src/root.c
index b6812cd..65a8685 100644
--- a/src/root.c
+++ b/src/root.c
@@ -3,19 +3,7 @@
  *
  * Search near the end of the game.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2022
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -61,59 +49,21 @@ void pv_debug(Search *search, const Move *bestmove, FILE *f)
 
 	x = bestmove->x;
 	fprintf(f, "pv = %s ", move_to_string(x, player, s));
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (hash_get_from_board(&search->pv_table, &board, &hash_data)) {
-=======
-	if (hash_get_from_board(&search->pv_table, HBOARD_P(&board), &hash_data)) {
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-=======
 	if (hash_get_from_board(&search->pv_table, &board, &hash_data)) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 		fprintf(f, ":%02d@%d%%[%+03d,%+03d]; ", hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, hash_data.lower, hash_data.upper);
 	}
 	while (x != NOMOVE) {
 		board_get_move_flip(&board, x, &move);
-<<<<<<< HEAD
-=======
-	hash_code = board_get_hash_code(&board);
-	if (hash_get(&search->pv_table, &board, hash_code, &hash_data)) {
-=======
-	if (hash_get_from_board(&search->pv_table, &board, &hash_data)) {
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-		fprintf(f, ":%02d@%d%%[%+03d,%+03d]; ", hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, hash_data.lower, hash_data.upper);
-	}
-	while (x != NOMOVE) {
-		board_get_move(&board, x, &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		board_update(&board, &move);
 		player ^= 1;
 
 		hash_code = board_get_hash_code(&board);
 		if (hash_get(&search->pv_table, &board, hash_code, &hash_data)) {
 			x = hash_data.move[0];
-<<<<<<< HEAD
-<<<<<<< HEAD
 			fprintf(f, "%s:%02d@%d%%[%+03d,%+03d]; ", move_to_string(x, player, s), hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, hash_data.lower, hash_data.upper);
 		} else if (hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
 			x = hash_data.move[0];
 			fprintf(f, "{%s}:%2d@%d%%[%+03d,%+03d]; ", move_to_string(x, player, s), hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, hash_data.lower, hash_data.upper);
-=======
-			fprintf(f, "%s:%02d@%d%%[%+03d,%+03d]; ", move_to_string(x, player, s), hash_data.depth, selectivity_table[hash_data.selectivity].percent, hash_data.lower, hash_data.upper);
-		} else if (hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
-			x = hash_data.move[0];
-			fprintf(f, "{%s}:%2d@%d%%[%+03d,%+03d]; ", move_to_string(x, player, s), hash_data.depth, selectivity_table[hash_data.selectivity].percent, hash_data.lower, hash_data.upper);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			fprintf(f, "%s:%02d@%d%%[%+03d,%+03d]; ", move_to_string(x, player, s), hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, hash_data.lower, hash_data.upper);
-		} else if (hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
-			x = hash_data.move[0];
-			fprintf(f, "{%s}:%2d@%d%%[%+03d,%+03d]; ", move_to_string(x, player, s), hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, hash_data.lower, hash_data.upper);
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 		} else x = NOMOVE;
 	}
 	fputc('\n', f);
@@ -142,10 +92,6 @@ bool is_pv_ok(Search *search, int bestmove, int search_depth)
 	x = bestmove;
 	while (search_depth > 0 && x != NOMOVE) {
 		if (x != PASS) --search_depth;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		board_get_move_flip(&board, x, &move);
 		board_update(&board, &move);
 
@@ -155,21 +101,6 @@ bool is_pv_ok(Search *search, int bestmove, int search_depth)
 			x = hash_data.move[0];
 		} else break;
 		if (hash_data.wl.c.depth < search_depth || hash_data.wl.c.selectivity < search->selectivity || hash_data.lower != hash_data.upper) return false;
-=======
-		board_get_move(&board, x, &move);
-		board_update(&board, &move);
-
-		hash_code = board_get_hash_code(&board);
-		if (hash_get(&search->pv_table, &board, hash_code, &hash_data)
-		 || hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
-			x = hash_data.move[0];
-		} else break;
-<<<<<<< HEAD
-		if (hash_data.depth < search_depth || hash_data.selectivity < search->selectivity || hash_data.lower != hash_data.upper) return false;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		if (hash_data.wl.c.depth < search_depth || hash_data.wl.c.selectivity < search->selectivity || hash_data.lower != hash_data.upper) return false;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 		if (x == NOMOVE && !board_is_game_over(&board)) return false;
 	}
 	return true;
@@ -194,28 +125,8 @@ static int guess_move(Search *search, Board *board)
 
 	search->board = *board; search_setup(search);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	PVS_shallow(search, SCORE_MIN, SCORE_MAX, MIN(search->eval.n_empties, 6));
-<<<<<<< HEAD
-<<<<<<< HEAD
 	hash_get_from_board(&search->shallow_table, board, &hash_data);
-<<<<<<< HEAD
-=======
-	PVS_shallow(search, SCORE_MIN, SCORE_MAX, MIN(search->n_empties, 6));
-=======
-	PVS_shallow(search, SCORE_MIN, SCORE_MAX, MIN(search->eval.n_empties, 6));
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-	hash_get(&search->shallow_table, board, board_get_hash_code(board), &hash_data);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
-	hash_get_from_board(&search->shallow_table, HBOARD_P(board), &hash_data);
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-=======
-	hash_get_from_board(&search->shallow_table, board, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 
 	search->board = saved; search_setup(search);
 
@@ -248,15 +159,7 @@ void record_best_move(Search *search, const Move *bestmove, const int alpha, con
 	int expected_depth, expected_selectivity, tmp;
 	Bound expected_bound;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	board = search->board;
-=======
-	board = *init_board;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	board = search->board;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 
 	spin_lock(result);
 
@@ -285,15 +188,7 @@ void record_best_move(Search *search, const Move *bestmove, const int alpha, con
 	fail_low = (bestmove->score <= alpha);
 
 	while (x != NOMOVE) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		board_get_move_flip(&board, x, &move);
-=======
-		board_get_move(&board, x, &move);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		board_get_move_flip(&board, x, &move);
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		if (board_check_move(&board, &move)) {
 			board_update(&board, &move);
 			--expected_depth; 
@@ -302,24 +197,8 @@ void record_best_move(Search *search, const Move *bestmove, const int alpha, con
 			line_push(&result->pv, move.x);
 
 			hash_code = board_get_hash_code(&board);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			if ((hash_get(&search->pv_table, &board, hash_code, &hash_data) || hash_get(&search->hash_table, &board, hash_code, &hash_data)) 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-			if ((hash_get(&search->pv_table, HBOARD_P(&board), hash_code, &hash_data) || hash_get(&search->hash_table, HBOARD_P(&board), hash_code, &hash_data)) 
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 			if ((hash_get(&search->pv_table, &board, hash_code, &hash_data) || hash_get(&search->hash_table, &board, hash_code, &hash_data)) 
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-			 && (hash_data.wl.c.depth >= expected_depth && hash_data.wl.c.selectivity >= expected_selectivity)
-=======
-			 && (hash_data.depth >= expected_depth && hash_data.selectivity >= expected_selectivity)
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 			 && (hash_data.wl.c.depth >= expected_depth && hash_data.wl.c.selectivity >= expected_selectivity)
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 			 && (hash_data.upper <= expected_bound.upper && hash_data.lower >= expected_bound.lower)) {
 				x = hash_data.move[0];
 			} else x = NOMOVE;
@@ -392,38 +271,15 @@ static int search_route_PVS(Search *search, int alpha, int beta, const int depth
 	assert(alpha < beta);
 	assert(SCORE_MIN <= alpha);
 	assert(beta <= SCORE_MAX);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	assert(depth >= 0 && depth <= search->eval.n_empties);
-=======
-	assert(depth >= 0 && depth <= search->n_empties);
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	assert(depth >= 0 && depth <= search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 
 	if (depth == search->eval.n_empties) {
 		if (depth == 0) score = search_solve_0(search);
 		else score = PVS_midgame(search, alpha, beta, depth, node);
 	} else {
 		if (depth == 0) score = search_eval_0(search);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		else if (depth == 1) score = -search_eval_1(search, -beta, -alpha, board_get_moves(&search->board));
-		else if (depth == 2) score = search_eval_2(search, alpha, beta, board_get_moves(&search->board));
-=======
-		else if (depth == 1) score = search_eval_1(search, alpha, beta, get_moves(search->board.player, search->board.opponent));
-		else if (depth == 2) score = search_eval_2(search, alpha, beta, get_moves(search->board.player, search->board.opponent));
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-		else if (depth == 1) score = search_eval_1(search, alpha, beta, false);
-		else if (depth == 2) score = search_eval_2(search, alpha, beta, false);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
 		else if (depth == 1) score = -search_eval_1(search, -beta, -alpha, board_get_moves(&search->board));
 		else if (depth == 2) score = search_eval_2(search, alpha, beta, board_get_moves(&search->board));
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 		else score = PVS_midgame(search, alpha, beta, depth, node);
 	}
 
@@ -470,59 +326,13 @@ int search_get_pv_cost(Search *search)
  */
 int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	HashData hash_data[1];
-	MoveList *movelist = search->movelist;
-	Board *board = search->board;
-=======
-=======
-	unsigned long long hash_code;
-<<<<<<< HEAD
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-	HashData hash_data;
-	HashStoreData hash_store_data;
-=======
-	HashStoreData hash_data;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-	MoveList *const movelist = &search->movelist;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	Move *move;
-	Node node;
-<<<<<<< HEAD
-	Eval Ev0;
-<<<<<<< HEAD
-	long long cost = -search_count_nodes(search);
-<<<<<<< HEAD
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	int cost_bits;
->>>>>>> 0a166fd (Remove 1 element array coding style)
 	unsigned long long hash_code;
 	HashStoreData hash_data;
 	MoveList *const movelist = &search->movelist;
 	Move *move;
 	Node node;
 	Eval eval0;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	Board board0;
-<<<<<<< HEAD
-=======
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-	Search_Backup backup;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-	V2DI board0;
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 	Board board0;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 	long long nodes_org = search_count_nodes(search);
 	assert(alpha < beta);
 	assert(SCORE_MIN <= alpha && alpha <= SCORE_MAX);
@@ -537,15 +347,6 @@ int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 	SEARCH_STATS(++statistics.n_PVS_root);
 	SEARCH_UPDATE_INTERNAL_NODES(search->n_nodes);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	// transposition cutoff
-	hash_code = board_get_hash_code(&search->board);
-
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
 	node_init(&node, search, alpha, beta, depth, movelist->n_moves, NULL);
 	node.pv_node = true;
 	search->node_type[0] = PV_NODE;
@@ -556,27 +357,9 @@ int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 		move = movelist->move->next = movelist->move + 1;
 		move->flipped = 0;
 		if (can_move(search->board.opponent, search->board.player)) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 			search_update_pass_midgame(search, &eval0);
 			node.bestscore = move->score = -search_route_PVS(search, -node.beta, -node.alpha, depth, &node);
 			search_restore_pass_midgame(search, &eval0);
-=======
-			search_update_pass_midgame(search);
-				node.bestscore = move->score = -search_route_PVS(search, -node.beta, -node.alpha, depth, &node);
-			search_restore_pass_midgame(search);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			search_update_pass_midgame(search, &backup.eval);
-			node.bestscore = move->score = -search_route_PVS(search, -node.beta, -node.alpha, depth, &node);
-			search_restore_pass_midgame(search, &backup.eval);
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-=======
-			search_update_pass_midgame(search, &eval0);
-			node.bestscore = move->score = -search_route_PVS(search, -node.beta, -node.alpha, depth, &node);
-			search_restore_pass_midgame(search, &eval0);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 			node.bestmove =  move->x = PASS;
 		} else  { // game over
 			node.bestscore =  move->score = search_solve(search);
@@ -585,70 +368,17 @@ int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 
 	} else {
 		// first move
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		board0 = search->board;
-=======
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 		board0 = search->board;
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		eval0 = search->eval;
-=======
-		Ev0.feature = search->eval.feature;
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-		backup.board = search->board;
-		backup.eval = search->eval;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-		board0 = search->board;
-		eval0 = search->eval;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-		if ((move = node_first_move(&node, movelist))) {
-			assert(board_check_move(&search->board, move));
-=======
-		Ev0 = search->eval;
-<<<<<<< HEAD
-		if ((move = node_first_move(node, movelist))) {
-			assert(board_check_move(board, move));
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
 		if ((move = node_first_move(&node, movelist))) {
 			assert(board_check_move(&search->board, move));
->>>>>>> 0a166fd (Remove 1 element array coding style)
 			search_update_midgame(search, move); search->node_type[search->height] = PV_NODE;
 				move->score = -search_route_PVS(search, -beta, -alpha, depth - 1, &node);
 				move->cost = search_get_pv_cost(search);
 				assert(SCORE_MIN <= move->score && move->score <= SCORE_MAX);
 				assert(search->stability_bound.lower <= move->score && move->score <= search->stability_bound.upper);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			search_restore_midgame(search, move->x, &eval0);
-<<<<<<< HEAD
-<<<<<<< HEAD
-			search->board = board0;
-=======
-			search_restore_midgame(search, move, &Ev0);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-			search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
 			search_restore_midgame(search, move->x, &eval0);
 			search->board = board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-			search->board = board0.board;
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-			search->board = board0;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 			if (log_is_open(search_log)) show_current_move(search_log->f, search, move, alpha, beta, false);
 			node_update(&node, move);
 			if (search->options.verbosity == 4) pv_debug(search, move, stdout);
@@ -670,29 +400,8 @@ int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 						}
 						move->cost = search_get_pv_cost(search);
 					assert(SCORE_MIN <= move->score && move->score <= SCORE_MAX);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-					search_restore_midgame(search, move->x, &eval0);
-<<<<<<< HEAD
-<<<<<<< HEAD
-					search->board = board0;
-=======
-					search_restore_midgame(search, move, &Ev0);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-					search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
 					search_restore_midgame(search, move->x, &eval0);
 					search->board = board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
-=======
-					search->board = board0.board;
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-					search->board = board0;
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 					if (log_is_open(search_log)) show_current_move(search_log->f, search, move, alpha, beta, false);
 					node_update(&node, move);
 					assert(SCORE_MIN <= node.bestscore && node.bestscore <= SCORE_MAX);
@@ -705,26 +414,8 @@ int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 	}
 
 	if (!search->stop) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		hash_code = board_get_hash_code(&search->board);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data);
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> cbf92ce (Fix occasional freezes)
-=======
-		hash_get(&search->pv_table, HBOARD_V(board0), hash_code, &hash_data.data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-		hash_get(&search->pv_table, &board0, hash_code, &hash_data.data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
 		hash_get(&search->pv_table, &search->board, hash_code, &hash_data.data);
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		if (movelist->n_moves) {	// 4.5.1
 			if (depth < search->options.multipv_depth) movelist_sort(movelist);
 			else movelist_sort_cost(movelist, &hash_data.data);
@@ -741,80 +432,9 @@ int PVS_root(Search *search, const int alpha, const int beta, const int depth)
 			hash_data.beta = beta;
 			hash_data.score = node.bestscore;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
-			if (search->options.guess_pv) hash_force(&search->pv_table, &search->board, hash_code, &hash_data);
-			else hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
-=======
-=======
-		hash_code = board_get_hash_code(&search->board);
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-		hash_get(&search->pv_table, &search->board, hash_code, &hash_data);
-=======
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-		if (depth < search->options.multipv_depth) movelist_sort(movelist);
-		else movelist_sort_cost(movelist, &hash_data.data);
-		movelist_sort_bestmove(movelist, node.bestmove);
-		record_best_move(search, movelist_first(movelist), alpha, beta, depth);
-
-		if (movelist->n_moves == get_mobility(search->board.player, search->board.opponent)) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-			cost += search_count_nodes(search);
-			cost_bits = last_bit(cost);
-			hash_store(&search->hash_table, &search->board, hash_code, depth, search->selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
-			if (search->options.guess_pv) hash_force(&search->pv_table, &search->board, hash_code, depth, search->selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
-			else hash_store(&search->pv_table, &search->board, hash_code, depth, search->selectivity, cost_bits, alpha, beta, node.bestscore, node.bestmove);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-			hash_store_data.data.depth = depth;
-			hash_store_data.data.selectivity = search->selectivity;
-			hash_store_data.data.cost = last_bit(search_count_nodes(search) - nodes_org);
-=======
-			hash_store_data.data.wl.c.depth = depth;
-			hash_store_data.data.wl.c.selectivity = search->selectivity;
-			hash_store_data.data.wl.c.cost = last_bit(search_count_nodes(search) - nodes_org);
-			hash_store_data.data.move[0] = node.bestmove;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-			hash_store_data.alpha = alpha;
-			hash_store_data.beta = beta;
-			hash_store_data.score = node.bestscore;
-
-			hash_store(&search->hash_table, &search->board, hash_code, &hash_store_data);
-			if (search->options.guess_pv) hash_force(&search->pv_table, &search->board, hash_code, &hash_store_data);
-			else hash_store(&search->pv_table, &search->board, hash_code, &hash_store_data);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
-			hash_data.data.wl.c.depth = depth;
-			hash_data.data.wl.c.selectivity = search->selectivity;
-			hash_data.data.wl.c.cost = last_bit(search_count_nodes(search) - nodes_org);
-			hash_data.data.move[0] = node.bestmove;
-			hash_data.alpha = alpha;
-			hash_data.beta = beta;
-			hash_data.score = node.bestscore;
-
-			hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
-			if (search->options.guess_pv) hash_force(&search->pv_table, &search->board, hash_code, &hash_data);
-			else hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-			hash_store(&search->hash_table, HBOARD_V(board0), hash_code, &hash_data);
-			if (search->options.guess_pv) hash_force(&search->pv_table, HBOARD_V(board0), hash_code, &hash_data);
-			else hash_store(&search->pv_table, HBOARD_V(board0), hash_code, &hash_data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-			hash_store(&search->hash_table, &board0, hash_code, &hash_data);
-			if (search->options.guess_pv) hash_force(&search->pv_table, &board0, hash_code, &hash_data);
-			else hash_store(&search->pv_table, &board0, hash_code, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
-=======
 			hash_store(&search->hash_table, &search->board, hash_code, &hash_data);
 			if (search->options.guess_pv) hash_force(&search->pv_table, &search->board, hash_code, &hash_data);
 			else hash_store(&search->pv_table, &search->board, hash_code, &hash_data);
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
 		}
 
 		assert(SCORE_MIN <= node.bestscore && node.bestscore <= SCORE_MAX);
@@ -854,18 +474,8 @@ int aspiration_search(Search *search, int alpha, int beta, const int depth, int
 	log_print(xboard_log, "edax (search)> search [%d, %d] %d (%d)\n", alpha, beta, depth, score);
 
 	if (is_depth_solving(depth, search->eval.n_empties)) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		alpha -= (alpha & 1);
 		beta += (beta & 1);
-=======
-		if (alpha & 1) --alpha;
-		if (beta & 1) ++beta;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-		alpha -= (alpha & 1);
-		beta += (beta & 1);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 	}
 
 	// at shallow depths always use a large window, for better move ordering
@@ -889,15 +499,7 @@ int aspiration_search(Search *search, int alpha, int beta, const int depth, int
 
 	width = 10 - depth; if (width < 1) width = 1;
 	if ((width & 1) && depth == search->eval.n_empties) ++width;
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-	
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
 
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 	for (i = 0; i < 10; ++i) {
 		old_score = score;
 
@@ -957,15 +559,7 @@ int aspiration_search(Search *search, int alpha, int beta, const int depth, int
 		if (score == old_score) break;
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	if (!search->stop) record_best_move(search, movelist_first(&search->movelist), alpha, beta, depth);
-=======
-	if (!search->stop) record_best_move(search, &search->board, movelist_first(&search->movelist), alpha, beta, depth);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	if (!search->stop) record_best_move(search, movelist_first(&search->movelist), alpha, beta, depth);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 	search->result->time = search_time(search);
 	search->result->n_nodes = search_count_nodes(search);
 	if (options.noise <= depth && search->options.verbosity >= 2) {
@@ -996,64 +590,20 @@ static bool get_last_level(Search *search, int *depth, int *selectivity)
 
 	for (i = 0; i < 4; ++i) {
 		hash_code = board_get_hash_code(&board);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		if (hash_get(&search->pv_table, &board, hash_code, &hash_data)
 		 || hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
-=======
-		if (hash_get(&search->pv_table, HBOARD_P(&board), hash_code, &hash_data)
-		 || hash_get(&search->hash_table, HBOARD_P(&board), hash_code, &hash_data)) {
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-		if (hash_get(&search->pv_table, &board, hash_code, &hash_data)
-		 || hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 			x = hash_data.move[0];
 		} else break;
 
 		d = hash_data.wl.c.depth + i;
 		s = hash_data.wl.c.selectivity;
-=======
-		if (hash_get(&search->pv_table, &board, hash_code, &hash_data)) {
-			x = hash_data.move[0];
-		} else if (hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
-=======
-		if (hash_get(&search->pv_table, &board, hash_code, &hash_data)
-		 || hash_get(&search->hash_table, &board, hash_code, &hash_data)) {
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-			x = hash_data.move[0];
-		} else break;
-
-<<<<<<< HEAD
-		d = hash_data.depth + i;
-		s = hash_data.selectivity;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		d = hash_data.wl.c.depth + i;
-		s = hash_data.wl.c.selectivity;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 
 		if (d > *depth) *depth = d;
 		if (s > *selectivity) *selectivity = s;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip)
 		board_get_move_flip(&board, x, &move);
 		board_update(&board, &move);
 
-=======
-		board_get_move(&board, x, &move);
-		board_update(&board, &move);
-<<<<<<< HEAD
-		
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 		if (x == PASS) --i;
 	}
 
@@ -1131,58 +681,22 @@ void iterative_deepening(Search *search, int alpha, int beta)
 	}
 
 	// reuse last search ?
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if (hash_get_from_board(&search->pv_table, &search->board, &hash_data)) {
-=======
-	if (hash_get(&search->pv_table, &search->board, board_get_hash_code(&search->board), &hash_data)) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	if (hash_get_from_board(&search->pv_table, &search->board, &hash_data)) {
->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame)
-=======
-	if (hash_get_from_board(&search->pv_table, HBOARD_P(&search->board), &hash_data)) {
->>>>>>> 0b8fa13 (More HBOARD hash functions)
-=======
-	if (hash_get_from_board(&search->pv_table, &search->board, &hash_data)) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 		char s[2][3];
 		if (search->options.verbosity >= 2) {
 			info("<hash: value = [%+02d, %+02d] ; bestmove = %s, %s ; level = %d@%d%% ; date = %d ; cost = %d>\n",
 				hash_data.lower, hash_data.upper,
 				move_to_string(hash_data.move[0], search->player, s[0]),
 				move_to_string(hash_data.move[1], search->player, s[1]),
-<<<<<<< HEAD
-<<<<<<< HEAD
-				hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent,
-				hash_data.wl.c.date, hash_data.wl.c.cost);
-=======
-				hash_data.depth, selectivity_table[hash_data.selectivity].percent,
-				hash_data.date, hash_data.cost);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 				hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent,
 				hash_data.wl.c.date, hash_data.wl.c.cost);
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 		}
 		if (log_is_open(search_log)) {
 			log_print(search_log, "--- Next Search ---: ");
 			hash_print(&hash_data, search_log->f);
 		}
-<<<<<<< HEAD
-<<<<<<< HEAD
-		old_depth = hash_data.wl.c.depth;
-		old_selectivity = hash_data.wl.c.selectivity;
-=======
-		old_depth = hash_data.depth;
-		old_selectivity = hash_data.selectivity;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		old_depth = hash_data.wl.c.depth;
 		old_selectivity = hash_data.wl.c.selectivity;
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
 
 		if (USE_PREVIOUS_SEARCH) {
 			if (hash_data.lower == hash_data.upper) {
@@ -1207,15 +721,7 @@ void iterative_deepening(Search *search, int alpha, int beta)
 	if (start > search->options.depth) start = search->options.depth;
 	if (start > search->eval.n_empties) start = search->eval.n_empties;
 	if (start < search->eval.n_empties) {
-<<<<<<< HEAD
-<<<<<<< HEAD
 		start += ((start ^ end) & 1);
-=======
-		if ((start & 1) != (end & 1)) ++start;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-		start += ((start ^ end) & 1);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 		if (start <= 0) start = 2 - (end & 1);
 		if (start > end) start = end;
 	}
@@ -1238,28 +744,12 @@ void iterative_deepening(Search *search, int alpha, int beta)
 		}
 		movelist_sort(movelist);
 		bestmove = movelist_first(movelist); bestmove->score = score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-		record_best_move(search, bestmove, alpha, beta, old_depth);
-=======
-		record_best_move(search, &search->board, bestmove, alpha, beta, old_depth);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		record_best_move(search, bestmove, alpha, beta, old_depth);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 		assert(SCORE_MIN <= result->score  && result->score <= SCORE_MAX);
 	} else {
 		Move pass = MOVE_PASS;
 		bestmove = &pass; bestmove->score = score;
-<<<<<<< HEAD
-<<<<<<< HEAD
-		record_best_move(search, bestmove, alpha, beta, old_depth);
-=======
-		record_best_move(search, &search->board, bestmove, alpha, beta, old_depth);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 		record_best_move(search, bestmove, alpha, beta, old_depth);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 		assert(SCORE_MIN <= result->score  && result->score <= SCORE_MAX);
 	}
 	search->selectivity = tmp_selectivity;
@@ -1279,15 +769,7 @@ void iterative_deepening(Search *search, int alpha, int beta)
 	// midgame : iterative depth
 	for (search->depth = start; search->depth < end; search->depth += 2) {
 		search->depth_pv_extension = get_pv_extension(search->depth, search->eval.n_empties);
-<<<<<<< HEAD
-<<<<<<< HEAD
 		score = aspiration_search(search, SCORE_MIN, SCORE_MAX/*alpha, beta*/, search->depth, score);	// https://github.com/eukaryo/edax-reversi-AVX-v446mod2
-=======
-		score = aspiration_search(search, alpha, beta, search->depth, score);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-		score = aspiration_search(search, SCORE_MIN, SCORE_MAX/*alpha, beta*/, search->depth, score);	// https://github.com/eukaryo/edax-reversi-AVX-v446mod2
->>>>>>> 011233a (Update root.c)
 		if (!search_continue(search)) return;
 		if (abs(score) >= SCORE_MAX - 1 && search->depth > end - ITERATIVE_MIN_EMPTIES && search->options.depth >= search->eval.n_empties) break;
 	}
@@ -1347,15 +829,7 @@ void* search_run(void *v)
 	}
 	search->height = 0;
 	search->node_type[search->height] = PV_NODE;
-<<<<<<< HEAD
-<<<<<<< HEAD
-	search->depth_pv_extension = get_pv_extension(0, search->eval.n_empties);
-=======
-	search->depth_pv_extension = get_pv_extension(0, search->n_empties);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 	search->depth_pv_extension = get_pv_extension(0, search->eval.n_empties);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	search->stability_bound.upper = SCORE_MAX - 2 * get_stability(search->board.opponent, search->board.player);
 	search->stability_bound.lower = 2 * get_stability(search->board.player, search->board.opponent) - SCORE_MAX;
 	search->result->score = search_bound(search, search_eval_0(search));
diff --git a/src/search.c b/src/search.c
index fdcf4fb..975854a 100644
--- a/src/search.c
+++ b/src/search.c
@@ -52,23 +52,7 @@
  * -# Reinsfeld A. (1983) An Improvement Of the Scout Tree-Search Algorithm. ICCA
  *     journal, 6(4), pp. 4-14.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2022
->>>>>>> 9794cc1 (Store solid-normalized hash in PVS_midgame)
-=======
- * @date 1998 - 2023
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -95,10 +79,6 @@ Log search_log[1];
 
 /** a quadrant id for each square */
 const unsigned char QUADRANT_ID[] = {
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
 	1, 1, 1, 1, 2, 2, 2, 2,
 	1, 1, 1, 1, 2, 2, 2, 2,
 	1, 1, 1, 1, 2, 2, 2, 2,
@@ -117,21 +97,6 @@ const unsigned long long quadrant_mask[] = {
 	0xF0F0F0F000000000, 0xF0F0F0F00F0F0F0F, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0FFFFFFFF,
 	0xFFFFFFFF00000000, 0xFFFFFFFF0F0F0F0F, 0xFFFFFFFFF0F0F0F0, 0xFFFFFFFFFFFFFFFF
 };
-<<<<<<< HEAD
-=======
-		1, 1, 1, 1, 2, 2, 2, 2,
-		1, 1, 1, 1, 2, 2, 2, 2,
-		1, 1, 1, 1, 2, 2, 2, 2,
-		1, 1, 1, 1, 2, 2, 2, 2,
-		4, 4, 4, 4, 8, 8, 8, 8,
-		4, 4, 4, 4, 8, 8, 8, 8,
-		4, 4, 4, 4, 8, 8, 8, 8,
-		4, 4, 4, 4, 8, 8, 8, 8,
-		0, 0
-	};
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
 
 /** level with no selectivity */
 const int NO_SELECTIVITY = 5;
@@ -148,28 +113,9 @@ const Selectivity selectivity_table [] = {
 
 /** threshold values to try stability cutoff during NWS search */
 // TODO: better values may exist.
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-static const signed char NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
-=======
-const unsigned char NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-	 99, 99, 99, 99,  6,  8, 10, 12,
-	  8, 10, 20, 22, 24, 26, 28, 30, // 8 & 9 lowered to work best with solid stone
-=======
-const signed char NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
-	 99, 99, 99, 99,  4,  8, 10, 12,
-=======
 static const signed char NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
 	 99, 99, 99, 99,  6,  8, 10, 12,
-<<<<<<< HEAD
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-	 14, 16, 20, 22, 24, 26, 28, 30,
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
-=======
 	  8, 10, 20, 22, 24, 26, 28, 30, // 8 & 9 lowered to work best with solid stone
->>>>>>> 22b144f (Tune NWS_stability_thres to work best with solid stone)
 	 32, 34, 36, 38, 40, 42, 44, 46,
 	 48, 48, 50, 50, 52, 52, 54, 54,
 	 56, 56, 58, 58, 60, 60, 62, 62,
@@ -180,15 +126,7 @@ static const signed char NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
 
 /** threshold values to try stability cutoff during PVS search */
 // TODO: better values may exist.
-<<<<<<< HEAD
-<<<<<<< HEAD
-const signed char PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
-=======
-const unsigned char PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
 const signed char PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
 	 99, 99, 99, 99, -2,  0,  2,  4,
 	  6,  8, 12, 14, 16, 18, 20, 22,
 	 24, 26, 28, 30, 32, 34, 36, 38,
@@ -410,28 +348,11 @@ void search_global_init(void)
 void search_resize_hashtable(Search *search) {
 	if (search->options.hash_size != options.hash_table_size) {
 		const int hash_size = 1u << options.hash_table_size;
-<<<<<<< HEAD
-<<<<<<< HEAD
 		const int pv_shallow_size = hash_size > 16 ? hash_size >> 4 : 1;
-=======
-		const int pv_size = hash_size > 16 ? hash_size >> 4 : 1;
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
 
 		hash_init(&search->hash_table, hash_size);
-<<<<<<< HEAD
 		hash_init(&search->pv_table, pv_shallow_size);
 		hash_init(&search->shallow_table, pv_shallow_size);
-=======
-		hash_init(&search->pv_table, pv_size);
-		hash_init(&search->shallow_table, hash_size);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		const int pv_shallow_size = hash_size > 16 ? hash_size >> 4 : 1;
-
-		hash_init(&search->hash_table, hash_size);
-		hash_init(&search->pv_table, pv_shallow_size);
-		hash_init(&search->shallow_table, pv_shallow_size);
->>>>>>> 927aa67 (Increase hash_table and decrease shallow_table; fix NO_SELECTIVITY hack)
 		search->options.hash_size = options.hash_table_size;
 	}
 }
@@ -535,27 +456,10 @@ void search_init(Search *search)
 void search_free(Search *search)
 {
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-	hash_free(&search->hash_table);
-	hash_free(&search->pv_table);
-	hash_free(&search->shallow_table);
-=======
-	hash_free(search->hash_table);
-	hash_free(search->pv_table);
-	hash_free(search->shallow_table);
-<<<<<<< HEAD
->>>>>>> 4a049b7 (Rewrite eval_open; Free SymetryPacking after init; short int feature)
-	// eval_free(search->eval);
-=======
-	// eval_free(&search->eval);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
 	hash_free(&search->hash_table);
 	hash_free(&search->pv_table);
 	hash_free(&search->shallow_table);
 	// eval_free(search->eval);
->>>>>>> 0a166fd (Remove 1 element array coding style)
 	
 	task_stack_free(search->tasks);
 	free(search->tasks);
@@ -576,16 +480,7 @@ void search_free(Search *search)
  */
 void search_setup(Search *search)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	int i, x, prev;
-=======
-	int i, x;
-	SquareList *empty;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	int i, x, prev;
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
 	static const unsigned char presorted_x[] = {
 		A1, A8, H1, H8,                    /* Corner */
 		C4, C5, D3, D6, E3, E6, F4, F5,    /* E */
@@ -599,112 +494,33 @@ void search_setup(Search *search)
 		D4, E4, D5, E5,                    /* center */
 	};
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	const Board * const board = &search->board;
 	unsigned long long E;
-<<<<<<< HEAD
-
-	// init empties, parity
-	search->eval.n_empties = 0;
-	search->eval.parity = 0;
-=======
-	Board *board = search->board;
-=======
-	const Board * const board = &search->board;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	unsigned long long E, B;
-=======
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
 
 	// init empties, parity
-<<<<<<< HEAD
-	search->n_empties = 0;
-<<<<<<< HEAD
-	search->parity = 0;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-=======
 	search->eval.n_empties = 0;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	search->eval.parity = 0;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
 
 	prev = NOMOVE;
 	E = ~(board->player | board->opponent);
 	for (i = 0; i < BOARD_SIZE; ++i) {    /* add empty squares */
 		x = presorted_x[i];
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
 		if (E & x_to_bit(x)) {
 			search->eval.parity ^= QUADRANT_ID[x];
 			search->empties[prev].next = x;
 			search->empties[x].previous = prev;
 			prev = x;
-<<<<<<< HEAD
-<<<<<<< HEAD
 			++search->eval.n_empties;
-=======
-		B = x_to_bit(x);
-		if (E & B) {
-			empty->x = x;
-			empty->b = B;
-			empty->quadrant = QUADRANT_ID[x];
-			search->eval.parity ^= empty->quadrant;
-			empty->previous = empty - 1;
-			empty->next = empty + 1;
-			search->x_to_empties[x] = empty;
-			empty = empty->next;
-=======
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-			++search->n_empties;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-			++search->eval.n_empties;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 		}
 	}
 	search->empties[prev].next = NOMOVE;	/* sentinel */
 	search->empties[NOMOVE].previous = prev;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 	search->empties[PASS].next = NOMOVE;
 	search->empties[PASS].previous = NOMOVE;
-=======
-	empty = search->empties + PASS;
-	empty->x = PASS;
-	empty->b = 0;
-	empty->previous = empty->next = empty;
-	search->x_to_empties[PASS] = empty;
-
-	empty = search->empties + NOMOVE;
-	empty->x = NOMOVE;
-	empty->b = 0;
-	empty->previous = empty->next = empty;
-	search->x_to_empties[NOMOVE] = empty;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-	search->empties[PASS].next = NOMOVE;
-	search->empties[PASS].previous = NOMOVE;
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
 
 	// init the evaluation function
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	eval_set(&search->eval, &search->board);
-=======
-	eval_set(&search->eval, board);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	eval_set(search);
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
 	eval_set(&search->eval, &search->board);
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
 }
 
 /**
@@ -721,15 +537,7 @@ void search_clone(Search *search, Search *master)
 	search_setup(search);
 	search->hash_table = master->hash_table; // share the hashtable
 	search->pv_table = master->pv_table; // share the pvtable
-<<<<<<< HEAD
-<<<<<<< HEAD
 	search->shallow_table = master->shallow_table; // share the shallowtable
-=======
-	search->shallow_table = master->shallow_table; // share the pvtable
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	search->shallow_table = master->shallow_table; // share the shallowtable
->>>>>>> 44fd278 (Rearrange PVS_shallow loop)
 	search->tasks = master->tasks;
 	search->observer = master->observer;
 
@@ -1060,93 +868,25 @@ void search_get_movelist(const Search *search, MoveList *movelist)
 {
 	Move *previous = movelist->move;
 	Move *move = movelist->move + 1;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	V2DI vboard;
-	unsigned long long moves;
-	int x;
-=======
-	const Board * const board = &search->board;
-	unsigned long long moves = get_moves(board->player, board->opponent);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	register int x;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-
-	vboard.board = search->board;
-	moves = vboard_get_moves(vboard);
-=======
-=======
-	vBoard board = load_vboard(search->board);
-	unsigned long long moves = vboard_get_moves(board, search->board);
->>>>>>> 3a92d84 (minor AVX512/SSE optimizations)
-	int x;
-
->>>>>>> 7204cd1 (Small fix on debug build, etc.)
-=======
 	V2DI vboard;
 	unsigned long long moves;
 	int x;
 
 	vboard.board = search->board;
 	moves = vboard_get_moves(vboard);
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	movelist->n_moves = 0;
 	foreach_bit(x, moves) {
 		move->x = x;
 		move->flipped = vboard_flip(vboard, x);
-<<<<<<< HEAD
-=======
-	int x, j;
-	widest_register	b;
-=======
-	int x;
->>>>>>> 8d39e74 (Loop out rounding score)
-
-	movelist->n_moves = 0;
-	foreach_bit(x, moves) {
-<<<<<<< HEAD
-		board_get_move(board, x, move);
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
-		move->x = x;
-		move->flipped = vboard_flip(board, x);
->>>>>>> 3a92d84 (minor AVX512/SSE optimizations)
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 		move->cost = 0;
 		previous = previous->next = move;
 		++move;
 		++(movelist->n_moves);
 	}
 	previous->next = NULL;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 }
 
-<<<<<<< HEAD
 #if 0	// inlined
-=======
-#if 0
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	movelist->n_moves = move - movelist->move - 1;
-	assert(movelist->n_moves == bit_count(moves));
-=======
->>>>>>> 7204cd1 (Small fix on debug build, etc.)
-=======
-	movelist->n_moves = move - movelist->move - 1;
->>>>>>> 534241b (Revise foreach_bit_r and first_bit_32)
-=======
->>>>>>> 8d39e74 (Loop out rounding score)
-}
-
-#if 0	// inlined
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
 /**
  * @brief Update the search state after a move.
  *
@@ -1156,22 +896,9 @@ void search_get_movelist(const Search *search, MoveList *movelist)
 void search_update_endgame(Search *search, const Move *move)
 {
 	search_swap_parity(search, move->x);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	empty_remove(search->empties, move->x);
 	board_update(&search->board, move);
 	--search->eval.n_empties;
-<<<<<<< HEAD
-=======
-	empty_remove(search->x_to_empties[move->x]);
-=======
-	empty_remove(search->empties, move->x);
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	board_update(&search->board, move);
-	--search->n_empties;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 
 }
 
@@ -1184,22 +911,9 @@ void search_update_endgame(Search *search, const Move *move)
 void search_restore_endgame(Search *search, const Move *move)
 {
 	search_swap_parity(search, move->x);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	empty_restore(search->empties, move->x);
 	board_restore(&search->board, move);
 	++search->eval.n_empties;
-<<<<<<< HEAD
-=======
-	empty_restore(search->x_to_empties[move->x]);
-=======
-	empty_restore(search->empties, move->x);
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	board_restore(&search->board, move);
-	++search->n_empties;
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 }
 
 /**
@@ -1221,134 +935,38 @@ void search_pass_endgame(Search *search)
  * @param search  search.
  * @param move    played move.
  */
-<<<<<<< HEAD
-static void search_update_midgame_tail(Search *search)
-{
-<<<<<<< HEAD
-=======
-	static const NodeType next_node_type[] = {CUT_NODE, ALL_NODE, CUT_NODE};
-
-	++search->height;
-	search->node_type[search->height] = next_node_type[search->node_type[search->height - 1]];
-}
-
-=======
->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op)
 void search_update_midgame(Search *search, const Move *move)
 {
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
 //	line_push(&debug_line, move->x);
 
 	search_swap_parity(search, move->x);
-<<<<<<< HEAD
-<<<<<<< HEAD
 	empty_remove(search->empties, move->x);
 	board_update(&search->board, move);
 	eval_update(move->x, move->flipped, &search->eval);
 	assert(search->eval.n_empties > 0);
 	--search->eval.n_empties;
-<<<<<<< HEAD
-=======
-	empty_remove(search->x_to_empties[move->x]);
-=======
-	empty_remove(search->empties, move->x);
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	board_update(&search->board, move);
-	eval_update(&search->eval, move);
-<<<<<<< HEAD
-	assert(search->n_empties > 0);
-	--search->n_empties;
-<<<<<<< HEAD
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
 	++search->height;
 	search->node_type[search->height] = (search->node_type[search->height - 1] == CUT_NODE) ? ALL_NODE : CUT_NODE;
-=======
-=======
-	assert(search->eval.n_empties > 0);
-	--search->eval.n_empties;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-	search_update_midgame_tail(search);
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-	++search->height;
-	search->node_type[search->height] = (search->node_type[search->height - 1] == CUT_NODE) ? ALL_NODE : CUT_NODE;
->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op)
 }
 
 /**
  * @brief Restore the search state as before a move.
  *
  * @param search  search.
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @param x       played move.
  * @param backup  board/eval to restore.
  */
 void search_restore_midgame(Search *search, int x, const Eval *eval0)
-=======
- * @param move    played move.
- * @param Ev	  eval to restore.
- */
-<<<<<<< HEAD
-void search_restore_midgame(Search *search, const Move *move, const Eval *Ev)
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-void search_restore_midgame(Search *search, const Move *move, const Eval *eval_to_restore)
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
- * @param x       played move.
- * @param backup  board/eval to restore.
- */
-<<<<<<< HEAD
-void search_restore_midgame(Search *search, int x, const Search_Backup *backup)
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-void search_restore_midgame(Search *search, int x, const Eval *eval0)
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 {
 //	line_print(&debug_line, 100, " ", stdout); putchar('\n');
 //	line_pop(&debug_line);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	// search_swap_parity(search, move->x);
-	// ++search->eval.n_empties;
-	// eval_restore(search->eval, move);
-	search->eval = *eval0;
-	// board_restore(&search->board, move);
-	empty_restore(search->empties, x);
-=======
-=======
-	search_swap_parity(search, move->x);
-<<<<<<< HEAD
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-	empty_restore(search->x_to_empties[move->x]);
-=======
-	empty_restore(search->empties, move->x);
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-	board_restore(&search->board, move);
-	++search->eval.n_empties;
-	// eval_restore(search->eval, move);
-	search->eval.feature = eval_to_restore->feature;
-<<<<<<< HEAD
-	eval_swap(&search->eval);
-<<<<<<< HEAD
-	++search->n_empties;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	++search->eval.n_empties;
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
->>>>>>> e966183 (Halves EVAL_WEIGHT table by n_empties parity instead of eval.player.)
-=======
 	// search_swap_parity(search, move->x);
 	// ++search->eval.n_empties;
 	// eval_restore(search->eval, move);
 	search->eval = *eval0;
 	// board_restore(&search->board, move);
 	empty_restore(search->empties, x);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 	assert(search->height > 0);
 	--search->height;
 }
@@ -1360,40 +978,11 @@ void search_restore_midgame(Search *search, int x, const Eval *eval0)
  */
 void search_update_pass_midgame(Search *search, Eval *backup)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b612999 (SSE optimized search_pass)
 	search_pass(search);
 	backup->feature = search->eval.feature;
-=======
-	static const NodeType next_node_type[] = {CUT_NODE, ALL_NODE, CUT_NODE};
-
-<<<<<<< HEAD
-	board_pass(search->board);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-	board_pass(&search->board);
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-	backup->feature = search->eval.feature;
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
 	eval_pass(&search->eval);
-<<<<<<< HEAD
-<<<<<<< HEAD
-	++search->height;
-	search->node_type[search->height] = (search->node_type[search->height - 1] == CUT_NODE) ? ALL_NODE : CUT_NODE;
-=======
-	search_update_midgame_tail(search);
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
 	++search->height;
 	search->node_type[search->height] = (search->node_type[search->height - 1] == CUT_NODE) ? ALL_NODE : CUT_NODE;
->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op)
 }
 
 /**
@@ -1401,40 +990,11 @@ void search_update_pass_midgame(Search *search, Eval *backup)
  *
  * @param search  search.
  */
-<<<<<<< HEAD
-<<<<<<< HEAD
-void search_restore_pass_midgame(Search *search, const Eval *eval0)
-=======
-void search_restore_pass_midgame(Search *search, const Eval *backup)
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-=======
 void search_restore_pass_midgame(Search *search, const Eval *eval0)
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	search_pass(search);
 	// eval_pass(&search->eval);
 	search->eval.feature = eval0->feature;
-<<<<<<< HEAD
-=======
-	board_pass(search->board);
-=======
-	board_pass(&search->board);
-<<<<<<< HEAD
->>>>>>> 0a166fd (Remove 1 element array coding style)
-	eval_pass(&search->eval);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-=======
-	search_pass(search);
->>>>>>> b612999 (SSE optimized search_pass)
-	// eval_pass(&search->eval);
-	search->eval.feature = backup->feature;
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	assert(search->height > 0);
 	--search->height;
 }
@@ -1582,20 +1142,7 @@ void result_print(Result *result, FILE *f)
  */
 bool search_SC_PVS(Search *search, int *alpha, int *beta, int *score)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if (USE_SC && *beta >= PVS_STABILITY_THRESHOLD[search->eval.n_empties]) {
-=======
-	const Board * const board = &search->board;
-
-<<<<<<< HEAD
-	if (USE_SC && *beta >= PVS_STABILITY_THRESHOLD[search->n_empties]) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-=======
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-	if (USE_SC && *beta >= PVS_STABILITY_THRESHOLD[search->eval.n_empties]) {
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 		CUTOFF_STATS(++statistics.n_stability_try;)
 		*score = SCORE_MAX - 2 * get_stability(search->board.opponent, search->board.player);
 		if (*score <= *alpha) {
@@ -1618,28 +1165,7 @@ bool search_SC_PVS(Search *search, int *alpha, int *beta, int *score)
  */
 bool search_SC_NWS(Search *search, const int alpha, int *score)
 {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[search->eval.n_empties]) {
-=======
-	const Board * const board = &search->board;
-
-<<<<<<< HEAD
-	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[search->n_empties]) {
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-=======
->>>>>>> 26dad03 (Use player bits only in board_score_1)
-	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[search->eval.n_empties]) {
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-=======
-	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[n_empties]) {
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
-	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[search->eval.n_empties]) {
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 		CUTOFF_STATS(++statistics.n_stability_try;)
 		*score = SCORE_MAX - 2 * get_stability(search->board.opponent, search->board.player);
 		if (*score <= alpha) {
@@ -1650,12 +1176,6 @@ bool search_SC_NWS(Search *search, const int alpha, int *score)
 	return false;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 // for 4 empties (min stage)
 bool search_SC_NWS_4(Search *search, const int alpha, int *score)
 {
@@ -1663,20 +1183,6 @@ bool search_SC_NWS_4(Search *search, const int alpha, int *score)
 		CUTOFF_STATS(++statistics.n_stability_try;)
 		*score = 2 * get_stability(search->board.opponent, search->board.player) - SCORE_MAX;
 		if (*score > alpha) {
-<<<<<<< HEAD
-=======
-bool search_SC_NWS_fulls_given(Search *search, const int alpha, int *score, unsigned long long allfull, V4DI *full)
-=======
-bool search_SC_NWS_fulls_given(Search *search, const int alpha, int *score, const unsigned long long full[5])
->>>>>>> 4303b09 (Returns all full lines in full[4])
-{
-	if (USE_SC && alpha >= NWS_STABILITY_THRESHOLD[search->eval.n_empties]) {
-		CUTOFF_STATS(++statistics.n_stability_try;)
-		*score = SCORE_MAX - 2 * get_stability_fulls_given(search->board.opponent, search->board.player, full);
-		if (*score <= alpha) {
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 			CUTOFF_STATS(++statistics.n_stability_low_cutoff;)
 			return true;
 		}
@@ -1684,19 +1190,7 @@ bool search_SC_NWS_fulls_given(Search *search, const int alpha, int *score, cons
 	return false;
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines)
-=======
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
 #if 0	// unused
-=======
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
-=======
-#if 0	// unused
->>>>>>> 4303b09 (Returns all full lines in full[4])
 /**
  * @brief Transposition Cutoff (TC).
  *
@@ -1784,27 +1278,11 @@ bool search_ETC_NWS(Search *search, MoveList *movelist, unsigned long long hash_
 		Move *move;
 		Board next;
 		HashData etc;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-		HashStoreData hash_data;
-=======
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-		HashStoreData hash_store_data;
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
 		HashStoreData hash_data;
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
 		unsigned long long etc_hash_code;
 		HashTable *hash_table = &search->hash_table;
 		const int etc_depth = depth - 1;
 		const int beta = alpha + 1;
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	
-=======
 
 		hash_data.data.wl.c.depth = depth;
 		hash_data.data.wl.c.selectivity = selectivity;
@@ -1812,21 +1290,6 @@ bool search_ETC_NWS(Search *search, MoveList *movelist, unsigned long long hash_
 		hash_data.alpha = alpha;
 		hash_data.beta = beta;
 
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-		CUTOFF_STATS(++statistics.n_etc_try;)
-		foreach_move (move, *movelist) {
-			next.opponent = search->board.player ^ (move->flipped | x_to_bit(move->x));
-			next.player = search->board.opponent ^ move->flipped;
-			SEARCH_UPDATE_ALL_NODES(search->n_nodes);
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-
-<<<<<<< HEAD
-		hash_data.data.wl.c.depth = depth;
-		hash_data.data.wl.c.selectivity = selectivity;
-		hash_data.data.wl.c.cost = 0;
-		hash_data.alpha = alpha;
-		hash_data.beta = beta;
-
 		CUTOFF_STATS(++statistics.n_etc_try;)
 		foreach_move (move, *movelist) {
 			next.opponent = search->board.player ^ (move->flipped | x_to_bit(move->x));
@@ -1834,85 +1297,23 @@ bool search_ETC_NWS(Search *search, MoveList *movelist, unsigned long long hash_
 			SEARCH_UPDATE_ALL_NODES(search->n_nodes);
 
 			if (USE_SC && alpha <= -NWS_STABILITY_THRESHOLD[search->eval.n_empties]) {
-<<<<<<< HEAD
 				*score = 2 * get_stability(next.opponent, next.player) - SCORE_MAX;
 				if (*score > alpha) {
 					hash_data.score = *score;
 					hash_data.data.move[0] = move->x;
-<<<<<<< HEAD
-<<<<<<< HEAD
-					hash_store(hash_table, &search->board, hash_code, &hash_data);
-<<<<<<< HEAD
-=======
-			if (USE_SC && alpha <= -NWS_STABILITY_THRESHOLD[search->n_empties]) {
-=======
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
-				*score = 2 * get_stability(next.opponent, next.player) - SCORE_MAX;
-				if (*score > alpha) {
-<<<<<<< HEAD
-					hash_store(hash_table, &search->board, hash_code, depth, selectivity, 0, alpha, beta, *score, move->x);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-					hash_store_data.score = *score;
-					hash_store_data.data.move[0] = move->x;
-					hash_store(hash_table, &search->board, hash_code, &hash_store_data);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-					hash_store(hash_table, HBOARD_P(&search->board), hash_code, &hash_data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 					hash_store(hash_table, &search->board, hash_code, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 					CUTOFF_STATS(++statistics.n_esc_high_cutoff;)
 					return true;
 				}
 			}
 
 			etc_hash_code = board_get_hash_code(&next);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 			if (USE_TC && hash_get(hash_table, &next, etc_hash_code, &etc) && etc.wl.c.selectivity >= selectivity && etc.wl.c.depth >= etc_depth) {
-=======
-			if (USE_TC && hash_get(hash_table, HBOARD_P(&next), etc_hash_code, &etc) && etc.wl.c.selectivity >= selectivity && etc.wl.c.depth >= etc_depth) {
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
-			if (USE_TC && hash_get(hash_table, &next, etc_hash_code, &etc) && etc.wl.c.selectivity >= selectivity && etc.wl.c.depth >= etc_depth) {
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 				*score = -etc.upper;
 				if (*score > alpha) {
 					hash_data.score = *score;
 					hash_data.data.move[0] = move->x;
-<<<<<<< HEAD
-<<<<<<< HEAD
-					hash_store(hash_table, &search->board, hash_code, &hash_data);
-<<<<<<< HEAD
-=======
-			if (USE_TC && hash_get(hash_table, &next, etc_hash_code, &etc) && etc.selectivity >= selectivity && etc.depth >= etc_depth) {
-=======
-			if (USE_TC && hash_get(hash_table, &next, etc_hash_code, &etc) && etc.wl.c.selectivity >= selectivity && etc.wl.c.depth >= etc_depth) {
->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union)
-				*score = -etc.upper;
-				if (*score > alpha) {
-<<<<<<< HEAD
-					hash_store(hash_table, &search->board, hash_code, depth, selectivity, 0, alpha, beta, *score, move->x);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
-					hash_store_data.score = *score;
-					hash_store_data.data.move[0] = move->x;
-					hash_store(hash_table, &search->board, hash_code, &hash_store_data);
->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in)
-=======
->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame)
-=======
-					hash_store(hash_table, HBOARD_P(&search->board), hash_code, &hash_data);
->>>>>>> e88638e (add vectorcall interface to hash functions)
-=======
 					hash_store(hash_table, &search->board, hash_code, &hash_data);
->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes)
 					CUTOFF_STATS(++statistics.n_etc_high_cutoff;)
 					return true;
 				}
diff --git a/src/search.h b/src/search.h
index 68e476b..cb264f2 100644
--- a/src/search.h
+++ b/src/search.h
@@ -3,23 +3,7 @@
  *
  * Search's header file.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2022
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
- * @date 1998 - 2023
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -79,55 +63,21 @@ extern struct Level {
 
 /** search stare */
 typedef struct Search {
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7167fe4 (Fill struct Search AVX alignment hole)
 	Board board;                                  /**< othello board (16) */
 
 	volatile unsigned long long n_nodes;          /**< node counter (8) */
 	volatile unsigned long long child_nodes;      /**< node counter (8) */
 
-<<<<<<< HEAD
-=======
-	Board board[1];                               /**< othello board */
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	Board board;                                  /**< othello board */
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
->>>>>>> 7167fe4 (Fill struct Search AVX alignment hole)
 	Eval eval;                                    /**< eval */
 
 	SquareList empties[BOARD_SIZE + 2];           /**< list of empty squares */
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-	int n_empties;                                /**< number of empty squares */
->>>>>>> 5e86fd6 (Change pointer-linked empty list to index-linked)
-=======
->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set)
 	int player;                                   /**< player color */
 	int id;                                       /**< search id */
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 0a166fd (Remove 1 element array coding style)
 	HashTable hash_table;                         /**< hashtable */
 	HashTable pv_table;                           /**< hashtable for the pv */
 	HashTable shallow_table;                      /**< hashtable for short search */
 	Random random;                                /**< random generator */
-<<<<<<< HEAD
-=======
-	HashTable hash_table[1];                      /**< hashtable */
-	HashTable pv_table[1];                        /**< hashtable for the pv */
-	HashTable shallow_table[1];                   /**< hashtable for short search */
-	Random random[1];                             /**< random generator */
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
->>>>>>> 0a166fd (Remove 1 element array coding style)
 
 	struct TaskStack *tasks;                      /**< available task queue */
 	struct Task *task;                            /**< search task */
@@ -178,31 +128,11 @@ typedef struct Search {
 struct Node;
 
 extern const unsigned char QUADRANT_ID[];
-<<<<<<< HEAD
-<<<<<<< HEAD
-extern const unsigned long long quadrant_mask[];
-extern const Selectivity selectivity_table[];
-extern const int NO_SELECTIVITY;
-<<<<<<< HEAD
-<<<<<<< HEAD
-// extern const signed char NWS_STABILITY_THRESHOLD[];
-extern const signed char PVS_STABILITY_THRESHOLD[];
-=======
-=======
 extern const unsigned long long quadrant_mask[];
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
 extern const Selectivity selectivity_table[];
 extern const int NO_SELECTIVITY;
-extern const unsigned char NWS_STABILITY_THRESHOLD[];
-extern const unsigned char PVS_STABILITY_THRESHOLD[];
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
-extern const signed char NWS_STABILITY_THRESHOLD[];
-=======
 // extern const signed char NWS_STABILITY_THRESHOLD[];
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
 extern const signed char PVS_STABILITY_THRESHOLD[];
->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY)
 extern const unsigned char SQUARE_TYPE[];
 
 /* function definition */
@@ -231,37 +161,11 @@ void search_swap_parity(Search*, const int);
 void search_get_movelist(const Search*, MoveList*);
 // void search_update_endgame(Search*, const Move*);
 // void search_restore_endgame(Search*, const Move*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-// void search_pass_endgame(Search*);
-=======
-void search_pass_endgame(Search*);
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
 // void search_pass_endgame(Search*);
->>>>>>> 8566ed0 (vector call version of board_next & get_moves)
 void search_update_midgame(Search*, const Move*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 void search_restore_midgame(Search*, int, const Eval*);
 void search_update_pass_midgame(Search*, Eval*);
 void search_restore_pass_midgame(Search*, const Eval*);
-=======
-void search_restore_midgame(Search*, const Move*, const Eval*);
-=======
-void search_restore_midgame(Search*, int, const Search_Backup*);
-<<<<<<< HEAD
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-void search_update_pass_midgame(Search*);
-void search_restore_pass_midgame(Search*);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-void search_update_pass_midgame(Search*, Eval*);
-void search_restore_pass_midgame(Search*, const Eval*);
->>>>>>> e970433 (Restore eval by copy in search_restore_pass_midgame)
 long long search_clock(Search*);
 long long search_time(Search*);
 unsigned long long search_count_nodes(Search*);
@@ -271,39 +175,12 @@ int get_pv_extension(const int, const int);
 
 void result_print(Result*, FILE*);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 // bool search_SC_PVS(Search*, int*, int*, int*);
 bool search_SC_NWS(Search*, const int, int*);
-<<<<<<< HEAD
 bool search_SC_NWS_4(Search*, const int, int*);
 // bool search_TC_PVS(HashData*, const int, const int, int*, int*, int*);
-=======
-bool search_SC_NWS_fulls_given(Search *, const int, int *, unsigned long long, V4DI *);
-bool search_TC_PVS(HashData*, const int, const int, volatile int*, volatile int*, int*);
->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction)
 bool search_TC_NWS(HashData*, const int, const int, const int, int*);
-<<<<<<< HEAD
 // bool search_ETC_PVS(Search*, MoveList*, unsigned long long, const int, const int, int*, int*, int*);
-=======
-// bool search_ETC_PVS(Search*, MoveList*, unsigned long long, const int, const int, volatile int*, volatile int*, int*);
->>>>>>> 9794cc1 (Store solid-normalized hash in PVS_midgame)
-=======
-bool search_SC_PVS(Search*, int*, int*, int*);
-bool search_SC_NWS(Search*, const int, int*);
-=======
-// bool search_SC_PVS(Search*, int*, int*, int*);
-<<<<<<< HEAD
-bool search_SC_NWS(Search*, const int, const int, int*);
->>>>>>> bb98132 (Split 5 empties search_shallow loop; tune stabiliby cutoff)
-=======
-bool search_SC_NWS(Search*, const int, int*);
-bool search_SC_NWS_4(Search*, const int, int*);
->>>>>>> 266ad5a (minimax from 5 empties and swap min/max stages)
-// bool search_TC_PVS(HashData*, const int, const int, int*, int*, int*);
-bool search_TC_NWS(HashData*, const int, const int, const int, int*);
-// bool search_ETC_PVS(Search*, MoveList*, unsigned long long, const int, const int, int*, int*, int*);
->>>>>>> 4303b09 (Returns all full lines in full[4])
 bool search_ETC_NWS(Search*, MoveList*, unsigned long long, const int, const int, const int, int*);
 
 NodeType next_node_type(const NodeType parent, const bool first_move);
@@ -314,23 +191,8 @@ extern int board_score_1(const unsigned long long, const int, const int);
 int NWS_endgame(Search*, const int);
 
 int search_eval_0(Search*);
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-int search_eval_1(Search*, int, int, unsigned long long);
-int search_eval_2(Search*, int, int, unsigned long long);
-=======
-int search_eval_1(Search*, const int, int, unsigned long long);
-int search_eval_2(Search*, int, const int, unsigned long long);
->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow)
-=======
-int search_eval_1(Search*, const int, int, bool);
-int search_eval_2(Search*, int, const int, bool);
->>>>>>> 9f982ee (Revise PASS handling; prioritymoves in shallow; optimize Neighbour test)
-=======
 int search_eval_1(Search*, int, int, unsigned long long);
 int search_eval_2(Search*, int, int, unsigned long long);
->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2)
 int NWS_midgame(Search*, const int, int, struct Node*);
 int PVS_midgame(Search*, const int, const int, int, struct Node*);
 // static int NWS_shallow(Search*, const int, int, HashTable*);
@@ -359,13 +221,6 @@ int search_get_pv_cost(Search*);
 void show_current_move(FILE *f, Search*, const Move*, const int, const int, const bool);
 int search_bound(const Search*, int);
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #if defined(hasSSE2) || defined(__ARM_NEON) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID)
   #ifdef __AVX2__
 	#define	mm_malloc(s)	_mm_malloc((s), 32)
@@ -386,63 +241,16 @@ int search_bound(const Search*, int);
 	}
 	#define mm_free(p)	free(*((void **)(p) - 1));
   #endif
-<<<<<<< HEAD
-=======
-#ifdef __SSE2__
-=======
-#ifdef hasSSE2
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-#if defined(hasSSE2) || defined(hasNeon) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86)
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-#if defined(hasSSE2) || defined(hasNeon) || defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-	#ifdef __AVX2__
-		#define	mm_malloc(s)	_mm_malloc((s), 32)
-		#define	mm_free(p)	_mm_free(p)
-	#elif defined(hasSSE2) && !defined(ANDROID)
-		#define	mm_malloc(s)	_mm_malloc((s), 16)
-		#define	mm_free(p)	_mm_free(p)
-	#elif defined(_MSC_VER)
-		#define	mm_malloc(s)	_aligned_malloc((s), 16)
-		#define	mm_free(p)	_aligned_free(p)
-	#else
-		static inline void *mm_malloc(size_t s) {
-			void *p = malloc(s + 16 + sizeof(void *));
-			if (!p) return p;
-			void **q = (void **)(((size_t) p + 15 + sizeof(void *)) & -16);
-			*(q - 1) = p;
-			return (void *) q;
-		}
-		#define mm_free(p)	free(*((void **)(p) - 1));
-	#endif
-<<<<<<< HEAD
-	#define	mm_free(p)	_mm_free(p)
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
 #else
 	#define	mm_malloc(s)	malloc(s)
 	#define	mm_free(p)	free(p)
 #endif
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> b612999 (SSE optimized search_pass)
 #ifdef hasSSE2	// search->board is aligned
 	#define	search_pass(search)	_mm_store_si128((__m128i *) &(search)->board, _mm_shuffle_epi32(*(__m128i *) &(search)->board, 0x4e))
 #else
 	#define	search_pass(search)	board_pass(&(search)->board)
 #endif
 
-<<<<<<< HEAD
-=======
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
->>>>>>> b612999 (SSE optimized search_pass)
 #endif
 
diff --git a/src/settings.h b/src/settings.h
index 391cec6..d007b7b 100644
--- a/src/settings.h
+++ b/src/settings.h
@@ -1,43 +1,11 @@
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
 /**
  * @file settings.h
  *
  * Various macro / constants to control algorithm usage.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2024
-=======
- * @date 1998 - 2017
->>>>>>> b3f048d (copyright changes)
-=======
- * @date 1998 - 2018
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
- * @date 1998 - 2024
->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build)
- * @author Richard Delorme
- * @version 4.5
-=======
- * @date 1998 - 2020
- * @author Richard Delorme
- * @version 4.4
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
- * @date 1998 - 2022
-=======
- * @date 1998 - 2023
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
  * @author Richard Delorme
  * @version 4.5
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
  */
 
 
@@ -48,87 +16,27 @@
 
 #define MOVE_GENERATOR_CARRY 1		// 32.6Mnps
 #define MOVE_GENERATOR_KINDERGARTEN 2	// 31.1Mnps
-<<<<<<< HEAD
-<<<<<<< HEAD
-#define MOVE_GENERATOR_SSE 3		// 34.4Mnps	// best for generic X64
-#define MOVE_GENERATOR_BITSCAN 4	// 32.7Mnps	// best for AMD K10/FX	// 7.21Mnps (neon_bitscan)
-=======
 #define MOVE_GENERATOR_SSE 3		// 34.4Mnps	// best for generic X64
-<<<<<<< HEAD
-#define MOVE_GENERATOR_BITSCAN 4	// 32.7Mnps	// best for AMD K10/FX
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 #define MOVE_GENERATOR_BITSCAN 4	// 32.7Mnps	// best for AMD K10/FX	// 7.21Mnps (neon_bitscan)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 #define MOVE_GENERATOR_ROXANE 5		// 29.0Mnps
 #define MOVE_GENERATOR_32 6		// 31.3Mnps	// best for 32bit X86
 #define MOVE_GENERATOR_SSE_BSWAP 7	// 30.6Mnps
 #define MOVE_GENERATOR_AVX 8		// 34.7Mnps	// best for modern X64
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
 #define MOVE_GENERATOR_AVX512 9
 #define MOVE_GENERATOR_NEON 10		// 6.71Mnps (neon_rbit), 6.51Mnps (neon_lzcnt), 6.17Mnps (neon_ppfill)
 #define MOVE_GENERATOR_SVE 11
-<<<<<<< HEAD
 
-<<<<<<< HEAD
 #define COUNT_LAST_FLIP_CARRY 1		// 33.8Mnps
-=======
-=======
-=======
-#define MOVE_GENERATOR_AVX512	9
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator)
-#define MOVE_GENERATOR_NEON 10
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-#define MOVE_GENERATOR_NEON 10		// neon_lzcnt (6.51Mnps), neon_ppfill (5.55Mnps)
-#define MOVE_GENERATOR_NEON_BITSCAN 11	// neon_bitscan (6.43Mnps)
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-#define MOVE_GENERATOR_NEON 10		// 6.71Mnps (neon_rbit), 6.51Mnps (neon_lzcnt), 6.17Mnps (neon_ppfill)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-=======
->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut)
-
-<<<<<<< HEAD
-#define	COUNT_LAST_FLIP_CARRY 1		// 33.8Mnps
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-#define COUNT_LAST_FLIP_CARRY 1		// 33.8Mnps
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 #define COUNT_LAST_FLIP_KINDERGARTEN 2	// 33.5Mnps
 #define COUNT_LAST_FLIP_SSE 3		// 34.7Mnps
 #define COUNT_LAST_FLIP_BITSCAN 4	// 33.9Mnps
 #define COUNT_LAST_FLIP_PLAIN 5		// 33.3Mnps
 #define COUNT_LAST_FLIP_32 6		// 33.1Mnps
 #define COUNT_LAST_FLIP_BMI2 7		// 34.7Mnps	// slow on AMD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 #define COUNT_LAST_FLIP_AVX_PPFILL 8
 #define COUNT_LAST_FLIP_AVX512 9
 #define COUNT_LAST_FLIP_NEON 10
 #define COUNT_LAST_FLIP_SVE 11
-<<<<<<< HEAD
-=======
-#define	COUNT_LAST_FLIP_CARRY 1
-#define COUNT_LAST_FLIP_KINDERGARTEN 2
-#define COUNT_LAST_FLIP_SSE 3
-#define COUNT_LAST_FLIP_BITSCAN 4
-#define COUNT_LAST_FLIP_PLAIN 5
-#define COUNT_LAST_FLIP_32 6
-#define COUNT_LAST_FLIP_BMI2 7
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 
 /**move generation. */
 #ifndef MOVE_GENERATOR
@@ -144,141 +52,19 @@
 		#define MOVE_GENERATOR MOVE_GENERATOR_NEON
 	#elif defined(__arm__) || defined(_M_ARM)
 		#define MOVE_GENERATOR MOVE_GENERATOR_BITSCAN
-=======
-#define MOVE_GENERATOR_SSE 3		// 33.3Mnps
-#define MOVE_GENERATOR_BITSCAN 4	// 32.7Mnps
-#define MOVE_GENERATOR_ROXANE 5		// 29.0Mnps
-#define MOVE_GENERATOR_32 6		// 31.3Mnps
-#define MOVE_GENERATOR_SSE_BSWAP 7	// 30.6Mnps
-#define MOVE_GENERATOR_AVX 8		// 34.4Mnps
-
-#define	COUNT_LAST_FLIP_CARRY 1		// 33.8Mnps
-#define COUNT_LAST_FLIP_KINDERGARTEN 2	// 33.5Mnps
-#define COUNT_LAST_FLIP_SSE 3		// 33.2Mnps
-#define COUNT_LAST_FLIP_BITSCAN 4	// 33.9Mnps
-#define COUNT_LAST_FLIP_PLAIN 5		// 33.3Mnps
-#define COUNT_LAST_FLIP_32 6		// 33.1Mnps
-#define COUNT_LAST_FLIP_BMI2 7		// 34.4Mnps
-
-/**move generation. */
-#ifndef MOVE_GENERATOR
-	#if defined(__x86_64__) || defined(_M_X64)
-		#ifdef __AVX2__
-			#define MOVE_GENERATOR MOVE_GENERATOR_AVX
-		#else
-			#define MOVE_MOVE_GENERATOR MOVE_GENERATOR_BITSCAN
-		#endif
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
-=======
-#define	COUNT_LAST_FLIP_AVX_PPFILL 8
-#define	COUNT_LAST_FLIP_AVX512 9
->>>>>>> 52949e1 (Add build options and files for new count_last_flips)
-
-/**move generation. */
-#ifndef MOVE_GENERATOR
-	#if defined(__AVX512VL__) || defined(__AVX10_1__)
-		#define MOVE_GENERATOR MOVE_GENERATOR_AVX512
-	#elif defined(__AVX2__)
-		#define MOVE_GENERATOR MOVE_GENERATOR_AVX
-	#elif defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2)
-		#define MOVE_GENERATOR MOVE_GENERATOR_SSE
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	#elif defined(HAS_CPU_64) // aarch64
-		#define MOVE_GENERATOR MOVE_GENERATOR_BITSCAN
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-	#elif defined(__aarch64__)
-		#define MOVE_GENERATOR MOVE_GENERATOR_NEON
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
-=======
-	#elif defined(__aarch64__) || defined(_M_ARM64)
-=======
-	#elif defined(__aarch64__) || defined(_M_ARM64) || defined(hasNeon)
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
-	#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-		#define MOVE_GENERATOR MOVE_GENERATOR_NEON
-	#elif defined(__arm__) || defined(_M_ARM)
-		#define MOVE_GENERATOR MOVE_GENERATOR_BITSCAN
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
 	#else
 		#define MOVE_GENERATOR MOVE_GENERATOR_32
 	#endif
 #endif
 #ifndef LAST_FLIP_COUNTER
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 54adf17 (Fall down to SSE board_score_1 for AVX512 lazy LC w/o DEFs)
 	#if (defined(__AVX512VL__) || defined(__AVX10_1__)) && (defined(SIMULLASTFLIP512) || defined(SIMULLASTFLIP) || defined(LASTFLIP_HIGHCUT))
 		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_AVX512
 	#elif defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2)
-=======
-	#if defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2) || defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON)
->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build)
-=======
-	#if defined(__AVX512VL__) || defined(__AVX10_1__)
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_AVX512
-<<<<<<< HEAD
-	#elif defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2) || defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON)
->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut)
-=======
-	#elif defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2)
->>>>>>> ddb5d3f (Add SVE SIMULLASTFLIP to endgame_neon (but not enabled))
 		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_SSE
 	#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_NEON)
 		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_NEON
 	#elif defined(__arm__) || defined(_M_ARM)
 		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_BITSCAN
-=======
-	#ifdef __x86_64__
-		#if defined(__BMI2__) && !defined(__BDVER4__) && !defined(__ZNVER1__)	// BMI2 is slow on AMD
-=======
-	#if defined(__x86_64__) || defined(_M_X64)
-		#if defined(__AVX2__) && !defined(__BDVER4__) && !defined(__ZNVER1__)	// BMI2 is slow on AMD
->>>>>>> 1dc032e (Improve visual c compatibility)
-			#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_BMI2
-		#else
-			#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_BITSCAN
-		#endif
->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added)
-=======
-	#ifdef hasSSE2
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_SSE
-<<<<<<< HEAD
-<<<<<<< HEAD
-		// #define AVXLASTFLIP	1	// slower on slow vzeroupper CPU
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
-	#elif defined(HAS_CPU_64) // aarch64
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_CARRY
->>>>>>> f2da03e (Refine arm builds adding neon support.)
-=======
-	#elif defined(__aarch64__)
-=======
-	#if defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2) || defined(__aarch64__) || defined(_M_ARM64)
-=======
-	#if defined(__SSE2__) || defined(_M_X64) || defined(hasSSE2) || defined(__aarch64__) || defined(_M_ARM64) || defined(hasNeon)
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_SSE
-	#elif defined(__arm__) || defined(_M_ARM)
->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32)
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_BITSCAN
->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h)
 	#else
 		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_32
 	#endif
@@ -353,18 +139,9 @@
 /** Try ETC down to this depth. */
 #define ETC_MIN_DEPTH 5
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-/** Dogaishi hash reduction Depth (before DEPTH_TO_SHALLOW_SEARCH) */
-#define MASK_SOLID_DEPTH 9
-
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
-=======
 /** Dogaishi hash reduction Depth (before DEPTH_TO_SHALLOW_SEARCH) */
 #define MASK_SOLID_DEPTH 9
 
->>>>>>> 26dad03 (Use player bits only in board_score_1)
 /** bound for usefull move sorting */
 #define SORT_ALPHA_DELTA 8
 
@@ -394,158 +171,3 @@
 
 #endif /* EDAX_SETTINGS_H */
 
-<<<<<<< HEAD
-=======
-/**
- * @file settings.h
- *
- * Various macro / constants to control algorithm usage.
- *
- * @date 1998 - 2020
- * @author Richard Delorme
- * @version 4.4
- */
-
-
-#ifndef EDAX_SETTINGS_H
-#define EDAX_SETTINGS_H
-
-#include <stdbool.h>
-
-#define MOVE_GENERATOR_CARRY 1		// 32.6Mnps
-#define MOVE_GENERATOR_KINDERGARTEN 2	// 31.1Mnps
-#define MOVE_GENERATOR_SSE 3		// 34.4Mnps	// best for generic X64
-#define MOVE_GENERATOR_BITSCAN 4	// 32.7Mnps	// best for AMD K10/FX
-#define MOVE_GENERATOR_ROXANE 5		// 29.0Mnps
-#define MOVE_GENERATOR_32 6		// 31.3Mnps	// best for 32bit X86
-#define MOVE_GENERATOR_SSE_BSWAP 7	// 30.6Mnps
-#define MOVE_GENERATOR_AVX 8		// 34.7Mnps	// best for modern X64
-
-#define	COUNT_LAST_FLIP_CARRY 1		// 33.8Mnps
-#define COUNT_LAST_FLIP_KINDERGARTEN 2	// 33.5Mnps
-#define COUNT_LAST_FLIP_SSE 3		// 34.7Mnps
-#define COUNT_LAST_FLIP_BITSCAN 4	// 33.9Mnps
-#define COUNT_LAST_FLIP_PLAIN 5		// 33.3Mnps
-#define COUNT_LAST_FLIP_32 6		// 33.1Mnps
-#define COUNT_LAST_FLIP_BMI2 7		// 34.7Mnps	// slow on AMD
-
-/**move generation. */
-#ifndef MOVE_GENERATOR
-	#ifdef __AVX2__
-		#define MOVE_GENERATOR MOVE_GENERATOR_AVX
-	#elif defined(hasSSE2)
-		#define MOVE_GENERATOR MOVE_GENERATOR_SSE
-	#else
-		#define MOVE_GENERATOR MOVE_GENERATOR_32
-	#endif
-#endif
-#ifndef LAST_FLIP_COUNTER
-	#ifdef hasSSE2
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_SSE
-		// #define AVXLASTFLIP	1	// slower on slow vzeroupper CPU
-	#else
-		#define LAST_FLIP_COUNTER COUNT_LAST_FLIP_32
-	#endif
-#endif
-
-/** transposition cutoff usage. */
-#define USE_TC true
-
-/** stability cutoff usage. */
-#define USE_SC true
-
-/** enhanced transposition cutoff usage. */
-#define USE_ETC true
-
-/** probcut usage. */
-#define USE_PROBCUT true
-
-/** Use recursive probcut */
-#define USE_RECURSIVE_PROBCUT true
-
-/** limit recursive probcut level */
-#define LIMIT_RECURSIVE_PROBCUT(x) x
-
-/** kogge-stone parallel prefix algorithm usage.
- *  0 -> none, 1 -> move generator, 2 -> stability, 3 -> both.
- */
-#define KOGGE_STONE 2
-
-/** 1 stage parallel prefix algorithm usage.
- *  0 -> none, 1 -> move generator, 2 -> stability, 3 -> both.
- */
-#define PARALLEL_PREFIX 1
-
-#if (KOGGE_STONE & PARALLEL_PREFIX)
-	#error "usage of 2 incompatible algorithms"
-#endif
-
-/** Internal Iterative Deepening. */
-#define USE_IID false
-
-/** Use previous search result */
-#define USE_PREVIOUS_SEARCH true
-
-/** Allow type puning */
-#ifndef USE_TYPE_PUNING
-// #ifndef ANDROID
-#define USE_TYPE_PUNING 1
-// #endif
-#endif
-
-/** Hash-n-way. */
-#define HASH_N_WAY 4
-
-/** hash align */
-#define HASH_ALIGNED 1
-
-/** PV extension (solve PV alone sooner) */
-#define USE_PV_EXTENSION true
-
-/** Swith from endgame to shallow search (faster but less node efficient) at this depth. */
-#define DEPTH_TO_SHALLOW_SEARCH 7
-
-/** Switch from midgame to endgame search (faster but less node efficient) at this depth. */
-#define DEPTH_MIDGAME_TO_ENDGAME 15
-
-/** Switch from midgame result (evaluated score) to endgame result (exact score) at this number of empties. */
-#define ITERATIVE_MIN_EMPTIES 10
-
-/** Store bestmoves in the pv_hash up to this height. */
-#define PV_HASH_HEIGHT 5
-
-/** Try ETC down to this depth. */
-#define ETC_MIN_DEPTH 5
-
-/** bound for usefull move sorting */
-#define SORT_ALPHA_DELTA 8
-
-/** Try Node splitting (for parallel search) down to that depth. */
-#define SPLIT_MIN_DEPTH 5
-
-/** Stop Node splitting (for parallel search) when few move remains.  */
-#define SPLIT_MIN_MOVES_TODO 1
-
-/** Stop Node splitting (for parallel search) after a few splitting.  */
-#define SPLIT_MAX_SLAVES 3
-
-/** Branching factor (to adjust alloted time). */
-#define BRANCHING_FACTOR 2.24
-
-/** Parallelisable work. */
-#define SMP_W 49.0
-
-/** Critical time. */
-#define SMP_C 1.0
-
-/** Fast perft */
-#define  FAST_PERFT true
-
-/** multi_pv depth */
-#define MULTIPV_DEPTH 10
-
-#endif /* EDAX_SETTINGS_H */
-
->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6)
-=======
->>>>>>> 3e1ed4f (fix cr/lf in repository to lf)
diff --git a/src/stats.h b/src/stats.h
index 519bd86..792ed14 100644
--- a/src/stats.h
+++ b/src/stats.h
@@ -38,23 +38,11 @@
 #if COUNT_NODES & 1
 	/** node counter for internal nodes */
 	#define SEARCH_UPDATE_INTERNAL_NODES(x) (++(x))
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 46e4b64 (Optimize endgame (esp. 2 empties) score comparisons)
 	#define SEARCH_UPDATE_2EMPTIES_NODES(x) x
 #else
 	/** no node counter for internal nodes */
 	#define SEARCH_UPDATE_INTERNAL_NODES(x)
 	#define SEARCH_UPDATE_2EMPTIES_NODES(x)
-<<<<<<< HEAD
-=======
-#else
-	/** no node counter for internal nodes */
-	#define SEARCH_UPDATE_INTERNAL_NODES(x)
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
-=======
->>>>>>> 46e4b64 (Optimize endgame (esp. 2 empties) score comparisons)
 #endif
 #if COUNT_NODES & 2
 	/** node counter for pattern changes */
diff --git a/src/util.c b/src/util.c
index 84dffb1..cbbc969 100644
--- a/src/util.c
+++ b/src/util.c
@@ -6,23 +6,7 @@
  * This should be the only file with linux/windows
  * dedicated code.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2018
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
- * @date 1998 - 2020
->>>>>>> 3848d16 (Satisfy msys2 and gcc 9 warnings)
-=======
- * @date 1998 - 2022
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -239,35 +223,13 @@ void relax(int t)
 char* format_scientific(double v, const char *unit, char *f)
 {
 #ifdef UNICODE
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	static const wchar_t multiple[] = L"EPTGMK mμnpfa"; // μ:U+03BC
-	static const char fmt[] = " %5.*f %lc%s";
-#else
-	static const char multiple[] = "EPTGMK mμnpfa"; // μ:B5@CP1252
-	static const char fmt[] = " %5.*f %c%s";
-#endif
-	int u, d;
-=======
-	static const wchar_t multiple[] = L"EPTGMK mµnpfa"; //
-=======
-	static const wchar_t multiple[] = L"EPTGMK mμnpfa"; //
-=======
 	static const wchar_t multiple[] = L"EPTGMK mμnpfa"; // μ:U+03BC
->>>>>>> 7204cd1 (Small fix on debug build, etc.)
 	static const char fmt[] = " %5.*f %lc%s";
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 #else
 	static const char multiple[] = "EPTGMK mμnpfa"; // μ:B5@CP1252
 	static const char fmt[] = " %5.*f %c%s";
 #endif
-<<<<<<< HEAD
-	int u;
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
 	int u, d;
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
 	if (fabs(v) < 1e-24) {
 		u = 0;
@@ -277,31 +239,11 @@ char* format_scientific(double v, const char *unit, char *f)
 		v /= pow(10, 3 * u);
 	}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 	if (fabs(v) - floor(fabs(v)) < 0.01) d = 1;
 	else if (fabs(v + 0.05) < 10.0) d = 3;
 	else if (fabs(v + 0.5) < 100.0) d = 2;
 	else d = 1;
 	sprintf(f, fmt, d, v, multiple[6 - u], unit);
-<<<<<<< HEAD
-=======
-#ifdef UNICODE
-	if (fabs(v) - floor(fabs(v)) < 0.01) sprintf(f, " %5.1f %lc%s", v, multiple[6 - u], unit);
-	else if (fabs(v + 0.05) < 10.0) sprintf(f, " %5.3f %lc%s", v, multiple[6 - u], unit);
-	else if (fabs(v + 0.5) < 100.0) sprintf(f, " %5.2f %lc%s", v, multiple[6 - u], unit);
-	else sprintf(f, " %5.1f %lc%s", v, multiple[6 - u], unit);
-#else
-	if (fabs(v) - floor(fabs(v)) < 0.01) sprintf(f, " %5.1f %c%s", v, multiple[6 - u], unit);
-	else if (fabs(v + 0.05) < 10.0) sprintf(f, " %5.3f %c%s", v, multiple[6 - u], unit);
-	else if (fabs(v + 0.5) < 100.0) sprintf(f, " %5.2f %c%s", v, multiple[6 - u], unit);
-	else sprintf(f, " %5.1f %c%s", v, multiple[6 - u], unit);
-#endif
->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling)
-=======
->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods)
 
 	return f;
 }
@@ -698,15 +640,7 @@ char* parse_move(const char *string, const Board *board, Move *move)
 		char *word = parse_skip_spaces(string);
 		int x = string_to_coordinate(word);
 		move->x = x;
-<<<<<<< HEAD
-<<<<<<< HEAD
-		move->flipped = board_flip(board, x);
-=======
-		move->flipped = Flip(x, board->player, board->opponent);
->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested))
-=======
 		move->flipped = board_flip(board, x);
->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99)
 		if (move->flipped && !board_is_occupied(board, x)) {
 			return word + 2;
 		} else if (board_is_pass(board)) {
diff --git a/src/util.h b/src/util.h
index 05fc5ad..e6bba73 100644
--- a/src/util.h
+++ b/src/util.h
@@ -3,15 +3,7 @@
  *
  * @brief Miscellaneous utilities header.
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2020
-=======
- * @date 1998 - 2018
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
- * @date 1998 - 2020
->>>>>>> 3848d16 (Satisfy msys2 and gcc 9 warnings)
  * @author Richard Delorme
  * @version 4.4
  */
@@ -260,15 +252,7 @@ typedef CRITICAL_SECTION Lock;
 typedef CRITICAL_SECTION SpinLock;
 
 /** Some buggy compilers need the following declarations */
-<<<<<<< HEAD
-<<<<<<< HEAD
-#if defined __MINGW32__ && (_WIN32_WINNT < 0x0600)
-=======
-#if defined(_WIN64) && !defined(_MSC_VER)
->>>>>>> 1dc032e (Improve visual c compatibility)
-=======
 #if defined __MINGW32__ && (_WIN32_WINNT < 0x0600)
->>>>>>> 3848d16 (Satisfy msys2 and gcc 9 warnings)
 
 #if (__MINGW64_VERSION_MAJOR < 3)
 typedef DWORD CONDITION_VARIABLE;
diff --git a/src/ybwc.c b/src/ybwc.c
index 58bf60c..8fd8925 100644
--- a/src/ybwc.c
+++ b/src/ybwc.c
@@ -22,23 +22,7 @@
  * ICCA Journal, Vol. 12, No. 2, pp. 65-73.
  * -# Feldmann R. (1993) Game-Tree Search on Massively Parallel System - PhD Thesis, Paderborn (English version).
  *
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
  * @date 1998 - 2023
-=======
- * @date 1998 - 2018
->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added)
-=======
- * @date 1998 - 2020
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
- * @date 1998 - 2022
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
- * @date 1998 - 2023
->>>>>>> 4087529 (Revise board0 usage; fix unused flips)
  * @author Richard Delorme
  * @version 4.5
  */
@@ -246,15 +230,7 @@ void node_wait_slaves(Node* node)
 		condition_wait(node);
 
 		if (node->is_helping) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-			assert(node->help.run);
-=======
-			assert(node.help->run);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 			assert(node->help.run);
->>>>>>> 7204cd1 (Small fix on debug build, etc.)
 			task_search(&node->help);
 			task_free(&node->help);
 			node->is_helping = false;
@@ -293,15 +269,7 @@ void node_update(Node* node, Move *move)
 		node->bestscore = score;
 		node->bestmove = move->x;
 		if (node->height == 0) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-			record_best_move(search, move, node->alpha, node->beta, node->depth);
-=======
-			record_best_move(search, &search->board, move, node->alpha, node->beta, node->depth);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 			record_best_move(search, move, node->alpha, node->beta, node->depth);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 			search->result->n_moves_left--;
 		}
 		if (score > node->alpha) node->alpha = score;
@@ -397,21 +365,8 @@ void task_search(Task *task)
 	Node *node = task->node;
 	Search *search = task->search;
 	Move *move = task->move;
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-	Eval eval0;
-	Board board0;
-=======
-	Eval Ev0;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-	Search_Backup backup;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
 	Eval eval0;
 	Board board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 	int i;
 
 	search_set_state(search, node->search->stop);
@@ -422,47 +377,16 @@ void task_search(Task *task)
 		const int alpha = node->alpha;
 		if (alpha >= node->beta) break;
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		board0 = search->board;
 		eval0 = search->eval;
-=======
-		Ev0 = search->eval;
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-		Ev0.feature = search->eval.feature;
->>>>>>> 037f46e (New eval_update_leaf updates eval on copy; save-restore eval.feature only)
-=======
-		backup.board = search->board;
-		backup.eval = search->eval;
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-		board0 = search->board;
-		eval0 = search->eval;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 		search_update_midgame(search, move);
 			move->score = -NWS_midgame(search, -alpha - 1, node->depth - 1, node);
 			if (alpha < move->score && move->score < node->beta) {
 				move->score = -PVS_midgame(search, -node->beta, -alpha, node->depth - 1, node);
 				assert(node->pv_node == true);
 			}
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
 		search_restore_midgame(search, move->x, &eval0);
 		search->board = board0;
-=======
-		search_restore_midgame(search, move, &Ev0);
->>>>>>> f1d221c (Replace eval_restore with simple save-restore, as well as parity)
-=======
-		search_restore_midgame(search, move->x, &backup);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
-=======
-		search_restore_midgame(search, move->x, &eval0);
-		search->board = board0;
->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM)
 		if (node->height == 0) {
 			move->cost = search_get_pv_cost(search);
 			move->score = search_bound(search, move->score);
@@ -474,15 +398,7 @@ void task_search(Task *task)
 			node->bestscore = move->score;
 			node->bestmove = move->x;
 			if (node->height == 0) {
-<<<<<<< HEAD
-<<<<<<< HEAD
-				record_best_move(search, move, alpha, node->beta, node->depth);
-=======
-				record_best_move(search, &search->board, move, alpha, node->beta, node->depth);
->>>>>>> 0a166fd (Remove 1 element array coding style)
-=======
 				record_best_move(search, move, alpha, node->beta, node->depth);
->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame)
 				search->result->n_moves_left--;
 				if (search->options.verbosity == 4) pv_debug(search, move, stdout);
 			}