spacemeshos · AndrewAR2 · Apr 20, 2023 · Apr 20, 2023 · Apr 24, 2023 · May 1, 2023
diff --git a/README.md b/README.md
@@ -190,6 +190,7 @@ Usage:
 --pow-diff           or -d <0-256>         count of leading zero bits in target D value [default - 16]
 --srand-seed         or -ss <unsigned int> set srand seed value for POW test: 0 - use zero id/seed [default], -1 - use random value
 --solution-idx       or -si <unsigned int> set solution index for POW test: index will be compared to be the found solution for Pow [default - unset]
+                        -N <scrypt N>      set scrypt parameter N [default - 512]
 ```
 
 ## Mixing CUDA and Vulkan

diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp
@@ -113,7 +113,7 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
 {
 	_vulkanState *state = (_vulkanState *)calloc(1, sizeof(_vulkanState));
 
-	uint32_t scrypt_mem = 128 * cgpu->r;
+	uint32_t scrypt_mem = 128 * cgpu->r * cgpu->N;
 
 	uint32_t computeQueueFamilyIndex = getComputeQueueFamilyIndex(cgpu->driver_id);
 	if (computeQueueFamilyIndex < 0) {
@@ -135,32 +135,24 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
 	gVulkan.vkFreeMemory(state->vkDevice, tmpMem, NULL);
 
 	cgpu->work_size = 64;
-
-	applog(LOG_NOTICE, "GPU %d: selecting lookup gap of 4", cgpu->driver_id);
 	cgpu->lookup_gap = 4;
 
-	unsigned int bsize = 1024;
-	size_t ipt = (bsize / cgpu->lookup_gap + (bsize % cgpu->lookup_gap > 0));
+	size_t ipt = scrypt_mem / cgpu->lookup_gap;
 
 	if (!cgpu->buffer_size) {
-		unsigned int base_alloc = (int)(cgpu->gpu_max_alloc * 88 / 100 / 1024 / 1024 / 8) * 8 * 1024 * 1024;
-		cgpu->thread_concurrency = (uint32_t)(base_alloc / scrypt_mem / ipt);
+		unsigned int base_alloc = (int)(cgpu->gpu_max_alloc * 92 / 100 / 1024 / 1024 / 8) * 8 * 1024 * 1024;
+		cgpu->thread_concurrency = (uint32_t)(base_alloc / ipt);
 		cgpu->buffer_size = base_alloc / 1024 / 1024;
-		applog(LOG_DEBUG, "88%% Max Allocation: %u", base_alloc);
+		applog(LOG_DEBUG, "92%% Max Allocation: %u", base_alloc);
 		applog(LOG_NOTICE, "GPU %d: selecting buffer_size of %zu", cgpu->driver_id, cgpu->buffer_size);
 	}
 
-	if (cgpu->buffer_size) {
-		// use the buffer-size to overwrite the thread-concurrency
-		cgpu->thread_concurrency = (int)((cgpu->buffer_size * 1024 * 1024) / ipt / scrypt_mem);
-	}
-
-	cgpu->thread_concurrency = min(cgpu->thread_concurrency, /*cgpu->work_size*/ 32 * 1024);
+	cgpu->thread_concurrency = min(cgpu->thread_concurrency, 32 * 1024);
 	uint32_t chunkSize = copy_only ? (cgpu->thread_concurrency * 32) : ((cgpu->thread_concurrency * hash_len_bits + 7) / 8);
 
 	applog(LOG_DEBUG, "GPU %d: setting thread_concurrency to %d based on buffer size %d and lookup gap %d", cgpu->driver_id, (int)(cgpu->thread_concurrency), (int)(cgpu->buffer_size), (int)(cgpu->lookup_gap));
 
-	state->bufSize = alignBuffer(scrypt_mem * ipt * cgpu->thread_concurrency, state->alignment);
+	state->bufSize = alignBuffer(ipt * cgpu->thread_concurrency, state->alignment);
 	state->memConstantSize = alignBuffer(sizeof(AlgorithmConstants), state->alignment);
 	state->memParamsSize = alignBuffer(sizeof(AlgorithmParams), state->alignment);
 	state->memInputSize = alignBuffer(PREIMAGE_SIZE, state->alignment);

diff --git a/src/vulkan/gen/CMakeLists.txt b/src/vulkan/gen/CMakeLists.txt
@@ -1,5 +1,9 @@
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
+if (MSVC)
+    set (CMAKE_CXX_FLAGS_DEBUG "/MD /Zi /O2 /Ob1 /DNDEBUG")
+endif()
+
 # A custom command and target to turn the Vulkan kernel into a byte array header
 
 add_custom_command(

diff --git a/src/vulkan/gen/scrypt-chacha.comp b/src/vulkan/gen/scrypt-chacha.comp
@@ -413,8 +413,7 @@ main()
 	uvec4 X[8];
 	const uint lid = gl_LocalInvocationID.x;
 	const uint gid = gl_GlobalInvocationID.x;
-	uint Nfactor = 0;
-	uint tmp = N >> 1;
+	uint tmp;
 	uvec2 nonce;
 
 	scrypt_hmac_state hmac_pw, work;
@@ -432,14 +431,6 @@ main()
 		nonce.x = global_work_offset.x + gid;
 	}
 
-	/* Determine the Nfactor */
-	while ((tmp & 1) == 0) {
-		tmp >>= 1;
-		Nfactor++;
-	}
-
-	const uint effective_concurrency = (concurrent_threads << 9) >> Nfactor;
-
 	password[0] = buffer0[0];
 	password[1] = buffer0[1];
 	password[2] = buffer0[2];
@@ -555,7 +546,7 @@ main()
 	}
 //////////////////////////////////////////////////
 	/* 2: X = ROMix(X) */
-	scrypt_ROMix(X, N, gid, effective_concurrency);
+	scrypt_ROMix(X, N, gid, concurrent_threads);
 
 	/* 3: Out = PBKDF2(password, X) */
 //////////////////////////////////////////////////

diff --git a/src/vulkan/vulkan-helpers.c b/src/vulkan/vulkan-helpers.c
@@ -129,7 +129,7 @@ void vulkan_library_shutdown()
 int getComputeQueueFamilyIndex(uint32_t index)
 {
 	if (index >= gPhysicalDeviceCount) {
-		applog(LOG_ERR, "Card index %u not found\n", index);
+		applog(LOG_ERR, "Card index %u not found", index);
 		return -1;
 	}
 	uint32_t queueFamilyPropertiesCount = 0;
@@ -210,7 +210,7 @@ VkDeviceMemory allocateGPUMemory(int index,  VkDevice vkDevice, const VkDeviceSi
 
 	VkResult ret = (memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS);
 	if (ret != VK_SUCCESS) {
-		applog(LOG_ERR,	"Cannot allocated %u kB GPU memory type for GPU index %u\n", (unsigned)(memorySize / 1024), index);
+		applog(LOG_ERR,	"Cannot allocated %u kB GPU memory type for GPU index %u", (unsigned)(memorySize / 1024), index);
 		return NULL;
 	}
 
@@ -345,7 +345,7 @@ VkPipeline loadShaderFromFile(VkDevice vkDevice, VkPipelineLayout pipelineLayout
 
 	FILE *fp = fopen(spirv_file_name, "rb");
 	if (fp == NULL) {
-		applog(LOG_ERR, "SPIR-V program %s not found\n", spirv_file_name);
+		applog(LOG_ERR, "SPIR-V program %s not found", spirv_file_name);
 		return NULL;
 	}
 	fseek(fp, 0, SEEK_END);
@@ -357,7 +357,7 @@ VkPipeline loadShaderFromFile(VkDevice vkDevice, VkPipelineLayout pipelineLayout
 	size_t read_size = fread(shader, sizeof(char), shader_size, fp);
 	if (read_size != shader_size) {
 		free(shader);
-		applog(LOG_ERR, "Failed to read shader %s!\n", spirv_file_name);
+		applog(LOG_ERR, "Failed to read shader %s!", spirv_file_name);
 		return NULL;
 	}
 
@@ -410,15 +410,15 @@ static uint32_t * getShader(uint32_t workSize, uint32_t labelSize, uint32_t *sha
 				if (vulkan_shaders_vault_header[0] == labelSize) {
 					uint32_t *shader = (uint32_t*)calloc(1, vulkan_shaders_vault_header[1]);
 					if (NULL == shader) {
-						applog(LOG_ERR, "Failed to allocate shader %u:%u %u\n", workSize, labelSize, vulkan_shaders_vault_header[1]);
+						applog(LOG_ERR, "Failed to allocate shader %u:%u %u", workSize, labelSize, vulkan_shaders_vault_header[1]);
 						return NULL;
 					}
-					applog(LOG_INFO, "64:%03u %u -> %u\n", vulkan_shaders_vault_header[0], vulkan_shaders_vault_header[2], vulkan_shaders_vault_header[1]);
+					applog(LOG_INFO, "64:%03u %u -> %u", vulkan_shaders_vault_header[0], vulkan_shaders_vault_header[2], vulkan_shaders_vault_header[1]);
 					*shader_size = vulkan_shaders_vault_header[1];
 					uint8_t *src = vulkan_shaders_vault + vulkan_shaders_vault_header[3];
 					uLongf shaderSize = vulkan_shaders_vault_header[1];
 					if (Z_OK != uncompress((uint8_t*)shader, &shaderSize, src, vulkan_shaders_vault_header[2])) {
-						applog(LOG_ERR, "Failed to uncompress shader %u:%u\n", workSize, labelSize);
+						applog(LOG_ERR, "Failed to uncompress shader %u:%u", workSize, labelSize);
 						free(shader);
 						return NULL;
 					}
@@ -437,11 +437,11 @@ VkPipeline loadShader(VkDevice vkDevice, VkPipelineLayout pipelineLayout, VkShad
 	uint32_t *shader = getShader(workSize, labelSize, &shader_size);
 
 	if (NULL == shader) {
-		applog(LOG_ERR, "SPIR-V program %d:%d not found\n", workSize, labelSize);
+		applog(LOG_ERR, "SPIR-V program %d:%d not found", workSize, labelSize);
 		return NULL;
 	}
 
-	applog(LOG_INFO, "SPIR-V program %u:%u %u bytes\n", workSize, labelSize, shader_size);
+	applog(LOG_INFO, "SPIR-V program %u:%u %u bytes", workSize, labelSize, shader_size);
 
 	VkShaderModuleCreateInfo shaderModuleCreateInfo = {
 		VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,

diff --git a/test/README.md b/test/README.md
@@ -22,4 +22,5 @@ Benchmarking and Testing
 --pow-diff           or -d <0-256>         count of leading zero bits in target D value [default - 16]
 --srand-seed         or -ss <unsigned int> set srand seed value for POW test: 0 - use zero id/seed [default], -1 - use random value
 --solution-idx       or -si <unsigned int> set solution index for POW test: index will be compared to be the found solution for Pow [default - unset]
+                        -N <scrypt N>      set scrypt parameter N [default - 512]
 ```
diff --git a/test/test.cpp b/test/test.cpp
@@ -20,6 +20,8 @@ static uint8_t s_salt[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
 static const uint8_t zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
+static uint32_t scryptN = 512;
+
 /* find binary substring */
 void * memstr(const void *src, size_t length, const uint8_t *token, int token_length)
 {
@@ -117,7 +119,7 @@ void do_benchmark(int aLabelSize, int aLabelsCount)
 				{
 					uint64_t hashes_computed;
 					uint64_t hashes_per_sec;
-					int status = scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
+					int status = scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
 					printf("%s: status %d, %u hashes, %u h/s\n", providers[i].model, status, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
 				}
 			}
@@ -167,8 +169,8 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri
 						uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 						referenceLabels = out + i * labelsBufferAlignedSize;
 						memset(referenceLabels, 0, labelsBufferSize);
-						scryptPositions(providers[i].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
-						printf("%s: %u hashes, %u h/s\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
+						int status = scryptPositions(providers[i].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+						printf("%s: %u hashes, %u h/s, status: %d\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, status);
 						aReferenceProvider = i;
 						checkOutput = true;
 						break;
@@ -180,8 +182,8 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri
 				uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 				referenceLabels = out + aReferenceProvider * labelsBufferAlignedSize;
 				memset(referenceLabels, 0, labelsBufferSize);
-				scryptPositions(providers[aReferenceProvider].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
-				printf("%s: %u hashes, %u h/s\n", providers[aReferenceProvider].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
+				scryptPositions(providers[aReferenceProvider].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+				int status = printf("%s: %u hashes, %u h/s, status %d\n", providers[aReferenceProvider].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, status);
 				checkOutput = true;
 			}
 
@@ -191,7 +193,7 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri
 					uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 					uint8_t *labels = out + i * labelsBufferAlignedSize;
 					memset(labels, 0, labelsBufferSize);
-					scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, labels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+					scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, labels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 					printf("%s: %u hashes, %u h/s\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
 					if (memstr(labels, labelsBufferSize, zeros, 8)) {
 						printf("ZEROS result\n");
@@ -309,7 +311,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 
 						if (idx_solution == -1ull) {
 							printf("Compute labels and look for a pow solution... Iteration: %d\n", j);
-							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS | SPACEMESH_API_COMPUTE_POW, out, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS | SPACEMESH_API_COMPUTE_POW, out, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 
 							if (status != SPACEMESH_API_ERROR_NONE && status != SPACEMESH_API_POW_SOLUTION_FOUND) {
 								printf("Compute error: %u\n", status);
@@ -331,7 +333,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 							printf("Compute labels only... Iteration: %d\n", j);
 
 							uint64_t idx_temp = -1;
-							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, 512, 1, 1, D, &idx_temp, &hashes_computed, &hashes_per_sec);
+							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, scryptN, 1, 1, D, &idx_temp, &hashes_computed, &hashes_per_sec);
 
 							if (status != SPACEMESH_API_ERROR_NONE && status != SPACEMESH_API_POW_SOLUTION_FOUND) {
 								printf("Compute returned an error: %u", status);
@@ -351,7 +353,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 
 						printf("Calling pow compute...\n");
 
-						int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_POW, out, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+						int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_POW, out, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 
 						printf("Compute pow only at index: %llu. hashes computed: %llu (%llu h/s)\n", idx, hashes_computed, hashes_per_sec);
 
@@ -379,7 +381,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 
 						// compute 256 hash at solution index:
 						uint8_t hash[32];
-						scryptPositions(cpu_id, id, idx_solution, idx_solution, 256, salt, SPACEMESH_API_COMPUTE_LEAFS, hash, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
+						scryptPositions(cpu_id, id, idx_solution, idx_solution, 256, salt, SPACEMESH_API_COMPUTE_LEAFS, hash, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
 
 						printf("D: ");
 						print_hex32(D);
@@ -455,14 +457,14 @@ int do_test_pow(uint64_t aStartPos, int aLabelsCount, unsigned aDiff, unsigned a
 					uint64_t hashes_computed;
 					uint64_t hashes_per_sec;
 					printf("%s: ", providers[i].model);
-					int status = scryptPositions(providers[i].id, s_id, aStartPos, aStartPos + aLabelsCount - 1, 8, s_salt, SPACEMESH_API_COMPUTE_POW, NULL, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+					int status = scryptPositions(providers[i].id, s_id, aStartPos, aStartPos + aLabelsCount - 1, 8, s_salt, SPACEMESH_API_COMPUTE_POW, NULL, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 					switch (status) {
 					case SPACEMESH_API_POW_SOLUTION_FOUND:
 						printf("%u hashes, %u h/s, solution at %u\n", (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, (uint32_t)idx_solution);
 						if (-1 != cpu_id) {
 							uint8_t hash[32];
 							memset(hash, 0, sizeof(hash));
-							scryptPositions(cpu_id, s_id, idx_solution, idx_solution, 256, s_salt, SPACEMESH_API_COMPUTE_LEAFS, hash, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
+							scryptPositions(cpu_id, s_id, idx_solution, idx_solution, 256, s_salt, SPACEMESH_API_COMPUTE_LEAFS, hash, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
 							printf("id: ");
 							print_hex32(s_id);
 							printf("\n");
@@ -891,6 +893,18 @@ int main(int argc, char **argv)
 				solutionIdx = strtoull(argv[i], NULL, 10);
 			}
 		}
+		else if (0 == strcmp(argv[i], "-N")) {
+			i++;
+			if (i < argc) {
+				scryptN = strtoul(argv[i], NULL, 10);
+			}
+		}
+		else if (0 == strcmp(argv[i], "--srand-seed") || 0 == strcmp(argv[i], "-ss")) {
+			i++;
+			if (i < argc) {
+				srand_seed = strtoul(argv[i], NULL, 10);
+			}
+		}
 		else if (0 == strcmp(argv[i], "-id")) {
 			i++;
 			if (i < argc) {