diff --git a/README.md b/README.md
index b77f67f..e5e60b9 100644
--- a/README.md
+++ b/README.md
@@ -190,6 +190,7 @@ Usage:
 --pow-diff           or -d <0-256>         count of leading zero bits in target D value [default - 16]
 --srand-seed         or -ss <unsigned int> set srand seed value for POW test: 0 - use zero id/seed [default], -1 - use random value
 --solution-idx       or -si <unsigned int> set solution index for POW test: index will be compared to be the found solution for Pow [default - unset]
+                        -N <scrypt N>      set scrypt parameter N [default - 512]
 ```
 
 ## Mixing CUDA and Vulkan
diff --git a/src/api_internal.h b/src/api_internal.h
index 9eccc0e..a66c78b 100644
--- a/src/api_internal.h
+++ b/src/api_internal.h
@@ -309,6 +309,7 @@ struct cgpu_info {
 	enum alive status;
 
 	uint32_t gpu_core_count;
+	uint64_t gpu_memory;
 	uint64_t gpu_max_alloc;
 
 	uint32_t hash_len_bits;
diff --git a/src/cuda/salsa_kernel.cu b/src/cuda/salsa_kernel.cu
index 749eb5d..76df15f 100644
--- a/src/cuda/salsa_kernel.cu
+++ b/src/cuda/salsa_kernel.cu
@@ -217,7 +217,7 @@ int find_optimal_concurency(struct cgpu_info *cgpu, _cudaState *cudaState, Kerne
 		optimal_blocks--;
 	}
 
-	optimal_blocks &= 0xfffffffc;
+	optimal_blocks &= (optimal_blocks > 512) ? 0xffffff00 : 0xfffffffc;
 
 	applog(LOG_INFO, "GPU #%d: using launch configuration %c%dx%d", cgpu->driver_id, kernel->get_identifier(), optimal_blocks, WARPS_PER_BLOCK);
 
diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp
index 56ab469..eb946ad 100644
--- a/src/vulkan/driver-vulkan.cpp
+++ b/src/vulkan/driver-vulkan.cpp
@@ -111,9 +111,10 @@ static uint64_t alignBuffer(uint64_t size, uint64_t align)
 
 static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameSize, uint32_t hash_len_bits, bool throttled, bool copy_only)
 {
-	_vulkanState *state = (_vulkanState *)calloc(1, sizeof(_vulkanState));
+	_vulkanState state;
+	memset(&state, 0, sizeof(_vulkanState));
 
-	uint32_t scrypt_mem = 128 * cgpu->r;
+	uint32_t scrypt_mem = 128 * cgpu->r * cgpu->N;
 
 	uint32_t computeQueueFamilyIndex = getComputeQueueFamilyIndex(cgpu->driver_id);
 	if (computeQueueFamilyIndex < 0) {
@@ -121,78 +122,87 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
 		return NULL;
 	}
 
-	state->deviceId = cgpu->driver_id;
-	state->vkDevice = createDevice(cgpu->driver_id, computeQueueFamilyIndex);
-	if (NULL == state->vkDevice) {
+	state.deviceId = cgpu->driver_id;
+	state.vkDevice = createDevice(cgpu->driver_id, computeQueueFamilyIndex);
+	if (NULL == state.vkDevice) {
 		applog(LOG_NOTICE, "GPU %d: Create Vulkan device instance failed", cgpu->driver_id);
 		return NULL;
 	}
 
-	VkDeviceMemory tmpMem = allocateGPUMemory(state->deviceId, state->vkDevice, 1024, true, true);
-	VkBuffer tmpBuf = createBuffer(state->vkDevice, computeQueueFamilyIndex, tmpMem, 256, 0);
-	state->alignment = (uint32_t)getBufferMemoryRequirements(state->vkDevice, tmpBuf);
-	gVulkan.vkDestroyBuffer(state->vkDevice, tmpBuf, NULL);
-	gVulkan.vkFreeMemory(state->vkDevice, tmpMem, NULL);
-
+	state.alignment = 256;
 	cgpu->work_size = 64;
-
-	applog(LOG_NOTICE, "GPU %d: selecting lookup gap of 4", cgpu->driver_id);
 	cgpu->lookup_gap = 4;
 
-	unsigned int bsize = 1024;
-	size_t ipt = (bsize / cgpu->lookup_gap + (bsize % cgpu->lookup_gap > 0));
+	size_t ipt = scrypt_mem / cgpu->lookup_gap;
+	size_t map = 88;
+	size_t gpu_max_alloc = cgpu->gpu_max_alloc;
+	unsigned max_threads = 32*1024;
+
+	applog(LOG_DEBUG, "GPU %d: %u MB mem, %u MB max alloc", cgpu->driver_id, (unsigned)(cgpu->gpu_memory / 1024 / 1024), (unsigned)(cgpu->gpu_max_alloc / 1024 / 1024));
+
+	if (0 != gpu_max_alloc) {
+		if (cgpu->gpu_memory > 4ull*1024ull*1024ull*1024ull) {
+			map = 100;
+		}
+	} else {
+		gpu_max_alloc = cgpu->gpu_memory;
+	}
 
 	if (!cgpu->buffer_size) {
-		unsigned int base_alloc = (int)(cgpu->gpu_max_alloc * 88 / 100 / 1024 / 1024 / 8) * 8 * 1024 * 1024;
-		cgpu->thread_concurrency = (uint32_t)(base_alloc / scrypt_mem / ipt);
+		size_t base_alloc = (gpu_max_alloc * map / 100 / 1024 / 1024 / 8) * 8 * 1024 * 1024;
+		cgpu->thread_concurrency = (uint32_t)(base_alloc / ipt);
+		cgpu->thread_concurrency = (cgpu->thread_concurrency / cgpu->work_size) * cgpu->work_size;
 		cgpu->buffer_size = base_alloc / 1024 / 1024;
-		applog(LOG_DEBUG, "88%% Max Allocation: %u", base_alloc);
+		applog(LOG_DEBUG, "%u%% Max Allocation: %u", (unsigned)map, (unsigned)base_alloc);
 		applog(LOG_NOTICE, "GPU %d: selecting buffer_size of %zu", cgpu->driver_id, cgpu->buffer_size);
 	}
 
-	if (cgpu->buffer_size) {
-		// use the buffer-size to overwrite the thread-concurrency
-		cgpu->thread_concurrency = (int)((cgpu->buffer_size * 1024 * 1024) / ipt / scrypt_mem);
-	}
-
-	cgpu->thread_concurrency = min(cgpu->thread_concurrency, /*cgpu->work_size*/ 32 * 1024);
+	cgpu->thread_concurrency = min(cgpu->thread_concurrency, max_threads);
 	uint32_t chunkSize = copy_only ? (cgpu->thread_concurrency * 32) : ((cgpu->thread_concurrency * hash_len_bits + 7) / 8);
 
 	applog(LOG_DEBUG, "GPU %d: setting thread_concurrency to %d based on buffer size %d and lookup gap %d", cgpu->driver_id, (int)(cgpu->thread_concurrency), (int)(cgpu->buffer_size), (int)(cgpu->lookup_gap));
 
-	state->bufSize = alignBuffer(scrypt_mem * ipt * cgpu->thread_concurrency, state->alignment);
-	state->memConstantSize = alignBuffer(sizeof(AlgorithmConstants), state->alignment);
-	state->memParamsSize = alignBuffer(sizeof(AlgorithmParams), state->alignment);
-	state->memInputSize = alignBuffer(PREIMAGE_SIZE, state->alignment);
-	state->memOutputSize = alignBuffer(chunkSize, state->alignment);
-	state->sharedMemorySize = state->memConstantSize + state->memParamsSize + state->memInputSize + 2 * state->memOutputSize;
+	state.bufSize = alignBuffer(ipt * cgpu->thread_concurrency, state.alignment);
+	state.memConstantSize = alignBuffer(sizeof(AlgorithmConstants), state.alignment);
+	state.memParamsSize = alignBuffer(sizeof(AlgorithmParams), state.alignment);
+	state.memInputSize = alignBuffer(PREIMAGE_SIZE, state.alignment);
+	state.memOutputSize = alignBuffer(chunkSize, state.alignment);
+	state.sharedMemorySize = state.memConstantSize + state.memParamsSize + state.memInputSize + 2 * state.memOutputSize;
 
-	state->gpuLocalMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->bufSize, true, true);
-	state->gpuSharedMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->sharedMemorySize, false, true);
+	state.gpuLocalMemory = allocateGPUMemory(state.deviceId, state.vkDevice, state.bufSize, true, true);
+	if (NULL == state.gpuLocalMemory) {
+		applog(LOG_ERR, "Cannot allocated gpuLocalMemory: %u kB GPU memory type for GPU index %u", (unsigned)(state.bufSize / 1024), state.deviceId);
+		return NULL;
+	}
+	state.gpuSharedMemory = allocateGPUMemory(state.deviceId, state.vkDevice, state.sharedMemorySize, false, true);
+	if (NULL == state.gpuSharedMemory) {
+		applog(LOG_ERR, "Cannot allocated gpuSharedMemory: %u kB GPU memory type for GPU index %u", (unsigned)(state.sharedMemorySize / 1024), state.deviceId);
+		return NULL;
+	}
 
-	state->padbuffer8 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuLocalMemory, state->bufSize, 0);
+	state.padbuffer8 = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuLocalMemory, state.bufSize, 0);
 
 	uint64_t o = 0;
-	state->gpu_constants = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memConstantSize, o);
-	o += state->memConstantSize;
-	state->gpu_params = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memParamsSize, o);
-	o += state->memParamsSize;
-	state->CLbuffer0 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memInputSize, o);
-	o += state->memInputSize;
-	state->outputBuffer[0] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o);
-	o += state->memOutputSize;
-	state->outputBuffer[1] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o);
-
-	gVulkan.vkGetDeviceQueue(state->vkDevice, computeQueueFamilyIndex, 0, &state->queue);
-
-	state->pipelineLayout = bindBuffers(state->vkDevice, &state->descriptorSet, &state->descriptorPool, &state->descriptorSetLayout,
-		state->padbuffer8, state->gpu_constants, state->gpu_params, state->CLbuffer0, state->outputBuffer[0], state->outputBuffer[1]
+	state.gpu_constants = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memConstantSize, o);
+	o += state.memConstantSize;
+	state.gpu_params = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memParamsSize, o);
+	o += state.memParamsSize;
+	state.CLbuffer0 = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memInputSize, o);
+	o += state.memInputSize;
+	state.outputBuffer[0] = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memOutputSize, o);
+	o += state.memOutputSize;
+	state.outputBuffer[1] = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memOutputSize, o);
+
+	gVulkan.vkGetDeviceQueue(state.vkDevice, computeQueueFamilyIndex, 0, &state.queue);
+
+	state.pipelineLayout = bindBuffers(state.vkDevice, &state.descriptorSet, &state.descriptorPool, &state.descriptorSetLayout,
+		state.padbuffer8, state.gpu_constants, state.gpu_params, state.CLbuffer0, state.outputBuffer[0], state.outputBuffer[1]
 	);
 
 	void *ptr = NULL;
-	CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, 0, state->memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL);
+	CHECK_RESULT(gVulkan.vkMapMemory(state.vkDevice, state.gpuSharedMemory, 0, state.memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL);
 	memcpy(ptr, (const void*)&gpuConstants, sizeof(AlgorithmConstants));
-	gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory);
+	gVulkan.vkUnmapMemory(state.vkDevice, state.gpuSharedMemory);
 
 	VkCommandPoolCreateInfo commandPoolCreateInfo = {
 		VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
@@ -200,56 +210,60 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
 		computeQueueFamilyIndex
 	};
-	CHECK_RESULT(gVulkan.vkCreateCommandPool(state->vkDevice, &commandPoolCreateInfo, 0, &state->commandPool), "vkCreateCommandPool", NULL);
+	CHECK_RESULT(gVulkan.vkCreateCommandPool(state.vkDevice, &commandPoolCreateInfo, 0, &state.commandPool), "vkCreateCommandPool", NULL);
 
 	VkCommandBufferAllocateInfo commandBufferAllocateInfo = {
 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
 		0,
-		state->commandPool,
+		state.commandPool,
 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,
 		1
 	};
-	CHECK_RESULT(gVulkan.vkAllocateCommandBuffers(state->vkDevice, &commandBufferAllocateInfo, &state->commandBuffer), "vkAllocateCommandBuffers", NULL);
+	CHECK_RESULT(gVulkan.vkAllocateCommandBuffers(state.vkDevice, &commandBufferAllocateInfo, &state.commandBuffer), "vkAllocateCommandBuffers", NULL);
 
 	VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
 	semaphoreCreateInfo.pNext = NULL;
 	semaphoreCreateInfo.flags = 0;
-	CHECK_RESULT(gVulkan.vkCreateSemaphore(state->vkDevice, &semaphoreCreateInfo, NULL, &state->semaphore), "vkCreateSemaphore", NULL);
+	CHECK_RESULT(gVulkan.vkCreateSemaphore(state.vkDevice, &semaphoreCreateInfo, NULL, &state.semaphore), "vkCreateSemaphore", NULL);
 
 	VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
 	fenceCreateInfo.pNext = NULL;
 	fenceCreateInfo.flags = 0;
 
-	CHECK_RESULT(gVulkan.vkCreateFence(state->vkDevice, &fenceCreateInfo, NULL, &state->fence), "vkCreateFence", NULL);
+	CHECK_RESULT(gVulkan.vkCreateFence(state.vkDevice, &fenceCreateInfo, NULL, &state.fence), "vkCreateFence", NULL);
 
 #if 0
 	char options[256];
 	snprintf(options, sizeof(options), "#version 450\n#define LOOKUP_GAP %d\n#define WORKSIZE %d\n#define LABEL_SIZE %d\n",
 		cgpu->lookup_gap, (int)cgpu->work_size, hash_len_bits);
 
-	state->pipeline = compileShader(state->vkDevice, state->pipelineLayout, &state->shaderModule, scrypt_chacha_comp, options, (int)cgpu->work_size, hash_len_bits, copy_only);
+	state.pipeline = compileShader(state.vkDevice, state.pipelineLayout, &state.shaderModule, scrypt_chacha_comp, options, (int)cgpu->work_size, hash_len_bits, copy_only);
 #else
 //	char filename[64];
 //	snprintf(filename, sizeof(filename), "kernel-%02d-%03d.spirv", (int)cgpu->work_size, hash_len_bits);
-	state->pipeline = loadShader(state->vkDevice, state->pipelineLayout, &state->shaderModule, cgpu->work_size, hash_len_bits);
+	state.pipeline = loadShader(state.vkDevice, state.pipelineLayout, &state.shaderModule, cgpu->work_size, hash_len_bits);
 #endif
-	if (!state->pipeline) {
+	if (!state.pipeline) {
 		return NULL;
 	}
 
 	VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 0, 0, 0 };
-	CHECK_RESULT(gVulkan.vkBeginCommandBuffer(state->commandBuffer, &commandBufferBeginInfo), "vkBeginCommandBuffer", NULL);
+	CHECK_RESULT(gVulkan.vkBeginCommandBuffer(state.commandBuffer, &commandBufferBeginInfo), "vkBeginCommandBuffer", NULL);
 
-	gVulkan.vkCmdBindPipeline(state->commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->pipeline);
-	gVulkan.vkCmdBindDescriptorSets(state->commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->pipelineLayout, 0, 1, &state->descriptorSet, 0, 0);
+	gVulkan.vkCmdBindPipeline(state.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state.pipeline);
+	gVulkan.vkCmdBindDescriptorSets(state.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state.pipelineLayout, 0, 1, &state.descriptorSet, 0, 0);
 #if 1
-	gVulkan.vkCmdDispatch(state->commandBuffer, cgpu->thread_concurrency / cgpu->work_size, 1, 1);
+	gVulkan.vkCmdDispatch(state.commandBuffer, cgpu->thread_concurrency / cgpu->work_size, 1, 1);
 #else
-	gVulkan.vkCmdDispatch(state->commandBuffer, 1, 1, 1);
+	gVulkan.vkCmdDispatch(state.commandBuffer, 1, 1, 1);
 #endif
-	CHECK_RESULT(gVulkan.vkEndCommandBuffer(state->commandBuffer), "vkEndCommandBuffer", NULL);
+	CHECK_RESULT(gVulkan.vkEndCommandBuffer(state.commandBuffer), "vkEndCommandBuffer", NULL);
 
-	return state;
+	_vulkanState* pstate = (_vulkanState*)calloc(1, sizeof(_vulkanState));
+	if (nullptr != pstate) {
+		memcpy(pstate, &state, sizeof(_vulkanState));
+	}
+	return pstate;
 }
 
 static int vulkan_detect(struct cgpu_info *gpus, int *active)
@@ -308,14 +322,19 @@ static int vulkan_detect(struct cgpu_info *gpus, int *active)
 			for (unsigned i = 0; i < gPhysicalDeviceCount; i++) {
 				struct cgpu_info *cgpu = &gpus[*active];
 
-				VkPhysicalDeviceProperties physicalDeviceProperties;
-				gVulkan.vkGetPhysicalDeviceProperties(gPhysicalDevices[i], &physicalDeviceProperties);
+				VkPhysicalDeviceMaintenance3Properties physicalDeviceProperties3;
+				physicalDeviceProperties3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES;
+				physicalDeviceProperties3.pNext = NULL;
+				VkPhysicalDeviceProperties2 physicalDeviceProperties2;
+				physicalDeviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+				physicalDeviceProperties2.pNext = &physicalDeviceProperties3;
+				gVulkan.vkGetPhysicalDeviceProperties2(gPhysicalDevices[i], &physicalDeviceProperties2);
 
-				if (0x10DE == physicalDeviceProperties.vendorID) {
+				if (0x10DE == physicalDeviceProperties2.properties.vendorID) {
 					continue;
 				}
 
-				memcpy(cgpu->name, physicalDeviceProperties.deviceName, min(sizeof(cgpu->name),sizeof(physicalDeviceProperties.deviceName)));
+				memcpy(cgpu->name, physicalDeviceProperties2.properties.deviceName, min(sizeof(cgpu->name),sizeof(physicalDeviceProperties2.properties.deviceName)));
 				cgpu->name[sizeof(cgpu->name) - 1] = 0;
 
 				VkPhysicalDeviceMemoryProperties memoryProperties;
@@ -332,12 +351,14 @@ static int vulkan_detect(struct cgpu_info *gpus, int *active)
 					VkMemoryType t = memoryProperties.memoryTypes[j];
 					if ((t.propertyFlags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
 						if (t.heapIndex < memoryProperties.memoryHeapCount) {
-							cgpu->gpu_max_alloc = memoryProperties.memoryHeaps[t.heapIndex].size;
+							cgpu->gpu_memory = memoryProperties.memoryHeaps[t.heapIndex].size;
 							break;
 						}
 					}
 				}
 
+				cgpu->gpu_max_alloc = physicalDeviceProperties3.maxMemoryAllocationSize;
+
 				*active += 1;
 				most_devices++;
 			}
diff --git a/src/vulkan/gen/CMakeLists.txt b/src/vulkan/gen/CMakeLists.txt
index 11ad6bf..fdeba28 100644
--- a/src/vulkan/gen/CMakeLists.txt
+++ b/src/vulkan/gen/CMakeLists.txt
@@ -1,5 +1,9 @@
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
+if (MSVC)
+    set (CMAKE_CXX_FLAGS_DEBUG "/MD /Zi /O2 /Ob1 /DNDEBUG")
+endif()
+
 # A custom command and target to turn the Vulkan kernel into a byte array header
 
 add_custom_command(
diff --git a/src/vulkan/gen/scrypt-chacha.comp b/src/vulkan/gen/scrypt-chacha.comp
index e1537c3..02b8c3a 100644
--- a/src/vulkan/gen/scrypt-chacha.comp
+++ b/src/vulkan/gen/scrypt-chacha.comp
@@ -413,8 +413,7 @@ main()
 	uvec4 X[8];
 	const uint lid = gl_LocalInvocationID.x;
 	const uint gid = gl_GlobalInvocationID.x;
-	uint Nfactor = 0;
-	uint tmp = N >> 1;
+	uint tmp;
 	uvec2 nonce;
 
 	scrypt_hmac_state hmac_pw, work;
@@ -432,14 +431,6 @@ main()
 		nonce.x = global_work_offset.x + gid;
 	}
 
-	/* Determine the Nfactor */
-	while ((tmp & 1) == 0) {
-		tmp >>= 1;
-		Nfactor++;
-	}
-
-	const uint effective_concurrency = (concurrent_threads << 9) >> Nfactor;
-	
 	password[0] = buffer0[0];
 	password[1] = buffer0[1];
 	password[2] = buffer0[2];
@@ -555,7 +546,7 @@ main()
 	}
 //////////////////////////////////////////////////
 	/* 2: X = ROMix(X) */
-	scrypt_ROMix(X, N, gid, effective_concurrency);
+	scrypt_ROMix(X, N, gid, concurrent_threads);
 
 	/* 3: Out = PBKDF2(password, X) */
 //////////////////////////////////////////////////
diff --git a/src/vulkan/vulkan-helpers.c b/src/vulkan/vulkan-helpers.c
index 6e2153f..385b503 100644
--- a/src/vulkan/vulkan-helpers.c
+++ b/src/vulkan/vulkan-helpers.c
@@ -80,6 +80,7 @@ int initVulkanLibrary()
 	LOAD_VULKAN_FUNCTION(vkEndCommandBuffer);
 	LOAD_VULKAN_FUNCTION(vkEnumeratePhysicalDevices);
 	LOAD_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties);
+	LOAD_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties2);
 	LOAD_VULKAN_FUNCTION(vkQueueSubmit);
 	LOAD_VULKAN_FUNCTION(vkQueueWaitIdle);
 	LOAD_VULKAN_FUNCTION(vkDestroyPipelineLayout);
@@ -129,7 +130,7 @@ void vulkan_library_shutdown()
 int getComputeQueueFamilyIndex(uint32_t index)
 {
 	if (index >= gPhysicalDeviceCount) {
-		applog(LOG_ERR, "Card index %u not found\n", index);
+		applog(LOG_ERR, "Card index %u not found", index);
 		return -1;
 	}
 	uint32_t queueFamilyPropertiesCount = 0;
@@ -210,7 +211,7 @@ VkDeviceMemory allocateGPUMemory(int index,  VkDevice vkDevice, const VkDeviceSi
 
 	VkResult ret = (memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS);
 	if (ret != VK_SUCCESS) {
-		applog(LOG_ERR,	"Cannot allocated %u kB GPU memory type for GPU index %u\n", (unsigned)(memorySize / 1024), index);
+		applog(LOG_ERR,	"Cannot allocated %u kB GPU memory type for GPU index %u", (unsigned)(memorySize / 1024), index);
 		return NULL;
 	}
 
@@ -345,7 +346,7 @@ VkPipeline loadShaderFromFile(VkDevice vkDevice, VkPipelineLayout pipelineLayout
 
 	FILE *fp = fopen(spirv_file_name, "rb");
 	if (fp == NULL) {
-		applog(LOG_ERR, "SPIR-V program %s not found\n", spirv_file_name);
+		applog(LOG_ERR, "SPIR-V program %s not found", spirv_file_name);
 		return NULL;
 	}
 	fseek(fp, 0, SEEK_END);
@@ -357,7 +358,7 @@ VkPipeline loadShaderFromFile(VkDevice vkDevice, VkPipelineLayout pipelineLayout
 	size_t read_size = fread(shader, sizeof(char), shader_size, fp);
 	if (read_size != shader_size) {
 		free(shader);
-		applog(LOG_ERR, "Failed to read shader %s!\n", spirv_file_name);
+		applog(LOG_ERR, "Failed to read shader %s!", spirv_file_name);
 		return NULL;
 	}
 
@@ -410,15 +411,15 @@ static uint32_t * getShader(uint32_t workSize, uint32_t labelSize, uint32_t *sha
 				if (vulkan_shaders_vault_header[0] == labelSize) {
 					uint32_t *shader = (uint32_t*)calloc(1, vulkan_shaders_vault_header[1]);
 					if (NULL == shader) {
-						applog(LOG_ERR, "Failed to allocate shader %u:%u %u\n", workSize, labelSize, vulkan_shaders_vault_header[1]);
+						applog(LOG_ERR, "Failed to allocate shader %u:%u %u", workSize, labelSize, vulkan_shaders_vault_header[1]);
 						return NULL;
 					}
-					applog(LOG_INFO, "64:%03u %u -> %u\n", vulkan_shaders_vault_header[0], vulkan_shaders_vault_header[2], vulkan_shaders_vault_header[1]);
+					applog(LOG_INFO, "64:%03u %u -> %u", vulkan_shaders_vault_header[0], vulkan_shaders_vault_header[2], vulkan_shaders_vault_header[1]);
 					*shader_size = vulkan_shaders_vault_header[1];
 					uint8_t *src = vulkan_shaders_vault + vulkan_shaders_vault_header[3];
 					uLongf shaderSize = vulkan_shaders_vault_header[1];
 					if (Z_OK != uncompress((uint8_t*)shader, &shaderSize, src, vulkan_shaders_vault_header[2])) {
-						applog(LOG_ERR, "Failed to uncompress shader %u:%u\n", workSize, labelSize);
+						applog(LOG_ERR, "Failed to uncompress shader %u:%u", workSize, labelSize);
 						free(shader);
 						return NULL;
 					}
@@ -437,11 +438,11 @@ VkPipeline loadShader(VkDevice vkDevice, VkPipelineLayout pipelineLayout, VkShad
 	uint32_t *shader = getShader(workSize, labelSize, &shader_size);
 
 	if (NULL == shader) {
-		applog(LOG_ERR, "SPIR-V program %d:%d not found\n", workSize, labelSize);
+		applog(LOG_ERR, "SPIR-V program %d:%d not found", workSize, labelSize);
 		return NULL;
 	}
 
-	applog(LOG_INFO, "SPIR-V program %u:%u %u bytes\n", workSize, labelSize, shader_size);
+	applog(LOG_INFO, "SPIR-V program %u:%u %u bytes", workSize, labelSize, shader_size);
 
 	VkShaderModuleCreateInfo shaderModuleCreateInfo = {
 		VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
diff --git a/src/vulkan/vulkan-helpers.h b/src/vulkan/vulkan-helpers.h
index 23fe301..7774af2 100644
--- a/src/vulkan/vulkan-helpers.h
+++ b/src/vulkan/vulkan-helpers.h
@@ -61,6 +61,7 @@ typedef struct _Vulkan {
 	DECLARE_VULKAN_FUNCTION(vkEndCommandBuffer);
 	DECLARE_VULKAN_FUNCTION(vkEnumeratePhysicalDevices);
 	DECLARE_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties);
+	DECLARE_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties2);
 	DECLARE_VULKAN_FUNCTION(vkQueueSubmit);
 	DECLARE_VULKAN_FUNCTION(vkQueueWaitIdle);
 	DECLARE_VULKAN_FUNCTION(vkDestroyPipelineLayout);
diff --git a/test/README.md b/test/README.md
index 37bbee7..6117ab5 100644
--- a/test/README.md
+++ b/test/README.md
@@ -22,4 +22,5 @@ Benchmarking and Testing
 --pow-diff           or -d <0-256>         count of leading zero bits in target D value [default - 16]
 --srand-seed         or -ss <unsigned int> set srand seed value for POW test: 0 - use zero id/seed [default], -1 - use random value
 --solution-idx       or -si <unsigned int> set solution index for POW test: index will be compared to be the found solution for Pow [default - unset]
+                        -N <scrypt N>      set scrypt parameter N [default - 512]
 ```
diff --git a/test/test.cpp b/test/test.cpp
index 3179e97..4d992db 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -20,6 +20,8 @@ static uint8_t s_salt[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
 static const uint8_t zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
+static uint32_t scryptN = 512;
+
 /* find binary substring */
 void * memstr(const void *src, size_t length, const uint8_t *token, int token_length)
 {
@@ -117,7 +119,7 @@ void do_benchmark(int aLabelSize, int aLabelsCount)
 				{
 					uint64_t hashes_computed;
 					uint64_t hashes_per_sec;
-					int status = scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
+					int status = scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
 					printf("%s: status %d, %u hashes, %u h/s\n", providers[i].model, status, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
 				}
 			}
@@ -167,8 +169,8 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri
 						uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 						referenceLabels = out + i * labelsBufferAlignedSize;
 						memset(referenceLabels, 0, labelsBufferSize);
-						scryptPositions(providers[i].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
-						printf("%s: %u hashes, %u h/s\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
+						int status = scryptPositions(providers[i].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+						printf("%s: %u hashes, %u h/s, status: %d\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, status);
 						aReferenceProvider = i;
 						checkOutput = true;
 						break;
@@ -180,8 +182,8 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri
 				uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 				referenceLabels = out + aReferenceProvider * labelsBufferAlignedSize;
 				memset(referenceLabels, 0, labelsBufferSize);
-				scryptPositions(providers[aReferenceProvider].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
-				printf("%s: %u hashes, %u h/s\n", providers[aReferenceProvider].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
+				scryptPositions(providers[aReferenceProvider].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+				int status = printf("%s: %u hashes, %u h/s, status %d\n", providers[aReferenceProvider].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, status);
 				checkOutput = true;
 			}
 
@@ -191,7 +193,7 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri
 					uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 					uint8_t *labels = out + i * labelsBufferAlignedSize;
 					memset(labels, 0, labelsBufferSize);
-					scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, labels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+					scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, labels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 					printf("%s: %u hashes, %u h/s\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec);
 					if (memstr(labels, labelsBufferSize, zeros, 8)) {
 						printf("ZEROS result\n");
@@ -309,7 +311,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 
 						if (idx_solution == -1ull) {
 							printf("Compute labels and look for a pow solution... Iteration: %d\n", j);
-							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS | SPACEMESH_API_COMPUTE_POW, out, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS | SPACEMESH_API_COMPUTE_POW, out, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 
 							if (status != SPACEMESH_API_ERROR_NONE && status != SPACEMESH_API_POW_SOLUTION_FOUND) {
 								printf("Compute error: %u\n", status);
@@ -331,7 +333,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 							printf("Compute labels only... Iteration: %d\n", j);
 
 							uint64_t idx_temp = -1;
-							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, 512, 1, 1, D, &idx_temp, &hashes_computed, &hashes_per_sec);
+							int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, scryptN, 1, 1, D, &idx_temp, &hashes_computed, &hashes_per_sec);
 
 							if (status != SPACEMESH_API_ERROR_NONE && status != SPACEMESH_API_POW_SOLUTION_FOUND) {
 								printf("Compute returned an error: %u", status);
@@ -351,7 +353,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 
 						printf("Calling pow compute...\n");
 
-						int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_POW, out, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+						int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_POW, out, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 
 						printf("Compute pow only at index: %llu. hashes computed: %llu (%llu h/s)\n", idx, hashes_computed, hashes_per_sec);
 
@@ -379,7 +381,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize)
 
 						// compute 256 hash at solution index:
 						uint8_t hash[32];
-						scryptPositions(cpu_id, id, idx_solution, idx_solution, 256, salt, SPACEMESH_API_COMPUTE_LEAFS, hash, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
+						scryptPositions(cpu_id, id, idx_solution, idx_solution, 256, salt, SPACEMESH_API_COMPUTE_LEAFS, hash, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
 
 						printf("D: ");
 						print_hex32(D);
@@ -455,14 +457,14 @@ int do_test_pow(uint64_t aStartPos, int aLabelsCount, unsigned aDiff, unsigned a
 					uint64_t hashes_computed;
 					uint64_t hashes_per_sec;
 					printf("%s: ", providers[i].model);
-					int status = scryptPositions(providers[i].id, s_id, aStartPos, aStartPos + aLabelsCount - 1, 8, s_salt, SPACEMESH_API_COMPUTE_POW, NULL, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
+					int status = scryptPositions(providers[i].id, s_id, aStartPos, aStartPos + aLabelsCount - 1, 8, s_salt, SPACEMESH_API_COMPUTE_POW, NULL, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec);
 					switch (status) {
 					case SPACEMESH_API_POW_SOLUTION_FOUND:
 						printf("%u hashes, %u h/s, solution at %u\n", (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, (uint32_t)idx_solution);
 						if (-1 != cpu_id) {
 							uint8_t hash[32];
 							memset(hash, 0, sizeof(hash));
-							scryptPositions(cpu_id, s_id, idx_solution, idx_solution, 256, s_salt, SPACEMESH_API_COMPUTE_LEAFS, hash, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
+							scryptPositions(cpu_id, s_id, idx_solution, idx_solution, 256, s_salt, SPACEMESH_API_COMPUTE_LEAFS, hash, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec);
 							printf("id: ");
 							print_hex32(s_id);
 							printf("\n");
@@ -891,6 +893,18 @@ int main(int argc, char **argv)
 				solutionIdx = strtoull(argv[i], NULL, 10);
 			}
 		}
+		else if (0 == strcmp(argv[i], "-N")) {
+			i++;
+			if (i < argc) {
+				scryptN = strtoul(argv[i], NULL, 10);
+			}
+		}
+		else if (0 == strcmp(argv[i], "--srand-seed") || 0 == strcmp(argv[i], "-ss")) {
+			i++;
+			if (i < argc) {
+				srand_seed = strtoul(argv[i], NULL, 10);
+			}
+		}
 		else if (0 == strcmp(argv[i], "-id")) {
 			i++;
 			if (i < argc) {