diff --git a/README.md b/README.md index b77f67f..e5e60b9 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,7 @@ Usage: --pow-diff or -d <0-256> count of leading zero bits in target D value [default - 16] --srand-seed or -ss set srand seed value for POW test: 0 - use zero id/seed [default], -1 - use random value --solution-idx or -si set solution index for POW test: index will be compared to be the found solution for Pow [default - unset] + -N set scrypt parameter N [default - 512] ``` ## Mixing CUDA and Vulkan diff --git a/src/api_internal.h b/src/api_internal.h index 9eccc0e..a66c78b 100644 --- a/src/api_internal.h +++ b/src/api_internal.h @@ -309,6 +309,7 @@ struct cgpu_info { enum alive status; uint32_t gpu_core_count; + uint64_t gpu_memory; uint64_t gpu_max_alloc; uint32_t hash_len_bits; diff --git a/src/cuda/salsa_kernel.cu b/src/cuda/salsa_kernel.cu index 749eb5d..76df15f 100644 --- a/src/cuda/salsa_kernel.cu +++ b/src/cuda/salsa_kernel.cu @@ -217,7 +217,7 @@ int find_optimal_concurency(struct cgpu_info *cgpu, _cudaState *cudaState, Kerne optimal_blocks--; } - optimal_blocks &= 0xfffffffc; + optimal_blocks &= (optimal_blocks > 512) ? 0xffffff00 : 0xfffffffc; applog(LOG_INFO, "GPU #%d: using launch configuration %c%dx%d", cgpu->driver_id, kernel->get_identifier(), optimal_blocks, WARPS_PER_BLOCK); diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp index 56ab469..eb946ad 100644 --- a/src/vulkan/driver-vulkan.cpp +++ b/src/vulkan/driver-vulkan.cpp @@ -111,9 +111,10 @@ static uint64_t alignBuffer(uint64_t size, uint64_t align) static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameSize, uint32_t hash_len_bits, bool throttled, bool copy_only) { - _vulkanState *state = (_vulkanState *)calloc(1, sizeof(_vulkanState)); + _vulkanState state; + memset(&state, 0, sizeof(_vulkanState)); - uint32_t scrypt_mem = 128 * cgpu->r; + uint32_t scrypt_mem = 128 * cgpu->r * cgpu->N; uint32_t computeQueueFamilyIndex = getComputeQueueFamilyIndex(cgpu->driver_id); if (computeQueueFamilyIndex < 0) { @@ -121,78 +122,87 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS return NULL; } - state->deviceId = cgpu->driver_id; - state->vkDevice = createDevice(cgpu->driver_id, computeQueueFamilyIndex); - if (NULL == state->vkDevice) { + state.deviceId = cgpu->driver_id; + state.vkDevice = createDevice(cgpu->driver_id, computeQueueFamilyIndex); + if (NULL == state.vkDevice) { applog(LOG_NOTICE, "GPU %d: Create Vulkan device instance failed", cgpu->driver_id); return NULL; } - VkDeviceMemory tmpMem = allocateGPUMemory(state->deviceId, state->vkDevice, 1024, true, true); - VkBuffer tmpBuf = createBuffer(state->vkDevice, computeQueueFamilyIndex, tmpMem, 256, 0); - state->alignment = (uint32_t)getBufferMemoryRequirements(state->vkDevice, tmpBuf); - gVulkan.vkDestroyBuffer(state->vkDevice, tmpBuf, NULL); - gVulkan.vkFreeMemory(state->vkDevice, tmpMem, NULL); - + state.alignment = 256; cgpu->work_size = 64; - - applog(LOG_NOTICE, "GPU %d: selecting lookup gap of 4", cgpu->driver_id); cgpu->lookup_gap = 4; - unsigned int bsize = 1024; - size_t ipt = (bsize / cgpu->lookup_gap + (bsize % cgpu->lookup_gap > 0)); + size_t ipt = scrypt_mem / cgpu->lookup_gap; + size_t map = 88; + size_t gpu_max_alloc = cgpu->gpu_max_alloc; + unsigned max_threads = 32*1024; + + applog(LOG_DEBUG, "GPU %d: %u MB mem, %u MB max alloc", cgpu->driver_id, (unsigned)(cgpu->gpu_memory / 1024 / 1024), (unsigned)(cgpu->gpu_max_alloc / 1024 / 1024)); + + if (0 != gpu_max_alloc) { + if (cgpu->gpu_memory > 4ull*1024ull*1024ull*1024ull) { + map = 100; + } + } else { + gpu_max_alloc = cgpu->gpu_memory; + } if (!cgpu->buffer_size) { - unsigned int base_alloc = (int)(cgpu->gpu_max_alloc * 88 / 100 / 1024 / 1024 / 8) * 8 * 1024 * 1024; - cgpu->thread_concurrency = (uint32_t)(base_alloc / scrypt_mem / ipt); + size_t base_alloc = (gpu_max_alloc * map / 100 / 1024 / 1024 / 8) * 8 * 1024 * 1024; + cgpu->thread_concurrency = (uint32_t)(base_alloc / ipt); + cgpu->thread_concurrency = (cgpu->thread_concurrency / cgpu->work_size) * cgpu->work_size; cgpu->buffer_size = base_alloc / 1024 / 1024; - applog(LOG_DEBUG, "88%% Max Allocation: %u", base_alloc); + applog(LOG_DEBUG, "%u%% Max Allocation: %u", (unsigned)map, (unsigned)base_alloc); applog(LOG_NOTICE, "GPU %d: selecting buffer_size of %zu", cgpu->driver_id, cgpu->buffer_size); } - if (cgpu->buffer_size) { - // use the buffer-size to overwrite the thread-concurrency - cgpu->thread_concurrency = (int)((cgpu->buffer_size * 1024 * 1024) / ipt / scrypt_mem); - } - - cgpu->thread_concurrency = min(cgpu->thread_concurrency, /*cgpu->work_size*/ 32 * 1024); + cgpu->thread_concurrency = min(cgpu->thread_concurrency, max_threads); uint32_t chunkSize = copy_only ? (cgpu->thread_concurrency * 32) : ((cgpu->thread_concurrency * hash_len_bits + 7) / 8); applog(LOG_DEBUG, "GPU %d: setting thread_concurrency to %d based on buffer size %d and lookup gap %d", cgpu->driver_id, (int)(cgpu->thread_concurrency), (int)(cgpu->buffer_size), (int)(cgpu->lookup_gap)); - state->bufSize = alignBuffer(scrypt_mem * ipt * cgpu->thread_concurrency, state->alignment); - state->memConstantSize = alignBuffer(sizeof(AlgorithmConstants), state->alignment); - state->memParamsSize = alignBuffer(sizeof(AlgorithmParams), state->alignment); - state->memInputSize = alignBuffer(PREIMAGE_SIZE, state->alignment); - state->memOutputSize = alignBuffer(chunkSize, state->alignment); - state->sharedMemorySize = state->memConstantSize + state->memParamsSize + state->memInputSize + 2 * state->memOutputSize; + state.bufSize = alignBuffer(ipt * cgpu->thread_concurrency, state.alignment); + state.memConstantSize = alignBuffer(sizeof(AlgorithmConstants), state.alignment); + state.memParamsSize = alignBuffer(sizeof(AlgorithmParams), state.alignment); + state.memInputSize = alignBuffer(PREIMAGE_SIZE, state.alignment); + state.memOutputSize = alignBuffer(chunkSize, state.alignment); + state.sharedMemorySize = state.memConstantSize + state.memParamsSize + state.memInputSize + 2 * state.memOutputSize; - state->gpuLocalMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->bufSize, true, true); - state->gpuSharedMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->sharedMemorySize, false, true); + state.gpuLocalMemory = allocateGPUMemory(state.deviceId, state.vkDevice, state.bufSize, true, true); + if (NULL == state.gpuLocalMemory) { + applog(LOG_ERR, "Cannot allocated gpuLocalMemory: %u kB GPU memory type for GPU index %u", (unsigned)(state.bufSize / 1024), state.deviceId); + return NULL; + } + state.gpuSharedMemory = allocateGPUMemory(state.deviceId, state.vkDevice, state.sharedMemorySize, false, true); + if (NULL == state.gpuSharedMemory) { + applog(LOG_ERR, "Cannot allocated gpuSharedMemory: %u kB GPU memory type for GPU index %u", (unsigned)(state.sharedMemorySize / 1024), state.deviceId); + return NULL; + } - state->padbuffer8 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuLocalMemory, state->bufSize, 0); + state.padbuffer8 = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuLocalMemory, state.bufSize, 0); uint64_t o = 0; - state->gpu_constants = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memConstantSize, o); - o += state->memConstantSize; - state->gpu_params = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memParamsSize, o); - o += state->memParamsSize; - state->CLbuffer0 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memInputSize, o); - o += state->memInputSize; - state->outputBuffer[0] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o); - o += state->memOutputSize; - state->outputBuffer[1] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o); - - gVulkan.vkGetDeviceQueue(state->vkDevice, computeQueueFamilyIndex, 0, &state->queue); - - state->pipelineLayout = bindBuffers(state->vkDevice, &state->descriptorSet, &state->descriptorPool, &state->descriptorSetLayout, - state->padbuffer8, state->gpu_constants, state->gpu_params, state->CLbuffer0, state->outputBuffer[0], state->outputBuffer[1] + state.gpu_constants = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memConstantSize, o); + o += state.memConstantSize; + state.gpu_params = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memParamsSize, o); + o += state.memParamsSize; + state.CLbuffer0 = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memInputSize, o); + o += state.memInputSize; + state.outputBuffer[0] = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memOutputSize, o); + o += state.memOutputSize; + state.outputBuffer[1] = createBuffer(state.vkDevice, computeQueueFamilyIndex, state.gpuSharedMemory, state.memOutputSize, o); + + gVulkan.vkGetDeviceQueue(state.vkDevice, computeQueueFamilyIndex, 0, &state.queue); + + state.pipelineLayout = bindBuffers(state.vkDevice, &state.descriptorSet, &state.descriptorPool, &state.descriptorSetLayout, + state.padbuffer8, state.gpu_constants, state.gpu_params, state.CLbuffer0, state.outputBuffer[0], state.outputBuffer[1] ); void *ptr = NULL; - CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, 0, state->memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL); + CHECK_RESULT(gVulkan.vkMapMemory(state.vkDevice, state.gpuSharedMemory, 0, state.memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL); memcpy(ptr, (const void*)&gpuConstants, sizeof(AlgorithmConstants)); - gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory); + gVulkan.vkUnmapMemory(state.vkDevice, state.gpuSharedMemory); VkCommandPoolCreateInfo commandPoolCreateInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, @@ -200,56 +210,60 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, computeQueueFamilyIndex }; - CHECK_RESULT(gVulkan.vkCreateCommandPool(state->vkDevice, &commandPoolCreateInfo, 0, &state->commandPool), "vkCreateCommandPool", NULL); + CHECK_RESULT(gVulkan.vkCreateCommandPool(state.vkDevice, &commandPoolCreateInfo, 0, &state.commandPool), "vkCreateCommandPool", NULL); VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 0, - state->commandPool, + state.commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1 }; - CHECK_RESULT(gVulkan.vkAllocateCommandBuffers(state->vkDevice, &commandBufferAllocateInfo, &state->commandBuffer), "vkAllocateCommandBuffers", NULL); + CHECK_RESULT(gVulkan.vkAllocateCommandBuffers(state.vkDevice, &commandBufferAllocateInfo, &state.commandBuffer), "vkAllocateCommandBuffers", NULL); VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; semaphoreCreateInfo.pNext = NULL; semaphoreCreateInfo.flags = 0; - CHECK_RESULT(gVulkan.vkCreateSemaphore(state->vkDevice, &semaphoreCreateInfo, NULL, &state->semaphore), "vkCreateSemaphore", NULL); + CHECK_RESULT(gVulkan.vkCreateSemaphore(state.vkDevice, &semaphoreCreateInfo, NULL, &state.semaphore), "vkCreateSemaphore", NULL); VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; fenceCreateInfo.pNext = NULL; fenceCreateInfo.flags = 0; - CHECK_RESULT(gVulkan.vkCreateFence(state->vkDevice, &fenceCreateInfo, NULL, &state->fence), "vkCreateFence", NULL); + CHECK_RESULT(gVulkan.vkCreateFence(state.vkDevice, &fenceCreateInfo, NULL, &state.fence), "vkCreateFence", NULL); #if 0 char options[256]; snprintf(options, sizeof(options), "#version 450\n#define LOOKUP_GAP %d\n#define WORKSIZE %d\n#define LABEL_SIZE %d\n", cgpu->lookup_gap, (int)cgpu->work_size, hash_len_bits); - state->pipeline = compileShader(state->vkDevice, state->pipelineLayout, &state->shaderModule, scrypt_chacha_comp, options, (int)cgpu->work_size, hash_len_bits, copy_only); + state.pipeline = compileShader(state.vkDevice, state.pipelineLayout, &state.shaderModule, scrypt_chacha_comp, options, (int)cgpu->work_size, hash_len_bits, copy_only); #else // char filename[64]; // snprintf(filename, sizeof(filename), "kernel-%02d-%03d.spirv", (int)cgpu->work_size, hash_len_bits); - state->pipeline = loadShader(state->vkDevice, state->pipelineLayout, &state->shaderModule, cgpu->work_size, hash_len_bits); + state.pipeline = loadShader(state.vkDevice, state.pipelineLayout, &state.shaderModule, cgpu->work_size, hash_len_bits); #endif - if (!state->pipeline) { + if (!state.pipeline) { return NULL; } VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 0, 0, 0 }; - CHECK_RESULT(gVulkan.vkBeginCommandBuffer(state->commandBuffer, &commandBufferBeginInfo), "vkBeginCommandBuffer", NULL); + CHECK_RESULT(gVulkan.vkBeginCommandBuffer(state.commandBuffer, &commandBufferBeginInfo), "vkBeginCommandBuffer", NULL); - gVulkan.vkCmdBindPipeline(state->commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->pipeline); - gVulkan.vkCmdBindDescriptorSets(state->commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->pipelineLayout, 0, 1, &state->descriptorSet, 0, 0); + gVulkan.vkCmdBindPipeline(state.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state.pipeline); + gVulkan.vkCmdBindDescriptorSets(state.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, state.pipelineLayout, 0, 1, &state.descriptorSet, 0, 0); #if 1 - gVulkan.vkCmdDispatch(state->commandBuffer, cgpu->thread_concurrency / cgpu->work_size, 1, 1); + gVulkan.vkCmdDispatch(state.commandBuffer, cgpu->thread_concurrency / cgpu->work_size, 1, 1); #else - gVulkan.vkCmdDispatch(state->commandBuffer, 1, 1, 1); + gVulkan.vkCmdDispatch(state.commandBuffer, 1, 1, 1); #endif - CHECK_RESULT(gVulkan.vkEndCommandBuffer(state->commandBuffer), "vkEndCommandBuffer", NULL); + CHECK_RESULT(gVulkan.vkEndCommandBuffer(state.commandBuffer), "vkEndCommandBuffer", NULL); - return state; + _vulkanState* pstate = (_vulkanState*)calloc(1, sizeof(_vulkanState)); + if (nullptr != pstate) { + memcpy(pstate, &state, sizeof(_vulkanState)); + } + return pstate; } static int vulkan_detect(struct cgpu_info *gpus, int *active) @@ -308,14 +322,19 @@ static int vulkan_detect(struct cgpu_info *gpus, int *active) for (unsigned i = 0; i < gPhysicalDeviceCount; i++) { struct cgpu_info *cgpu = &gpus[*active]; - VkPhysicalDeviceProperties physicalDeviceProperties; - gVulkan.vkGetPhysicalDeviceProperties(gPhysicalDevices[i], &physicalDeviceProperties); + VkPhysicalDeviceMaintenance3Properties physicalDeviceProperties3; + physicalDeviceProperties3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; + physicalDeviceProperties3.pNext = NULL; + VkPhysicalDeviceProperties2 physicalDeviceProperties2; + physicalDeviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physicalDeviceProperties2.pNext = &physicalDeviceProperties3; + gVulkan.vkGetPhysicalDeviceProperties2(gPhysicalDevices[i], &physicalDeviceProperties2); - if (0x10DE == physicalDeviceProperties.vendorID) { + if (0x10DE == physicalDeviceProperties2.properties.vendorID) { continue; } - memcpy(cgpu->name, physicalDeviceProperties.deviceName, min(sizeof(cgpu->name),sizeof(physicalDeviceProperties.deviceName))); + memcpy(cgpu->name, physicalDeviceProperties2.properties.deviceName, min(sizeof(cgpu->name),sizeof(physicalDeviceProperties2.properties.deviceName))); cgpu->name[sizeof(cgpu->name) - 1] = 0; VkPhysicalDeviceMemoryProperties memoryProperties; @@ -332,12 +351,14 @@ static int vulkan_detect(struct cgpu_info *gpus, int *active) VkMemoryType t = memoryProperties.memoryTypes[j]; if ((t.propertyFlags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { if (t.heapIndex < memoryProperties.memoryHeapCount) { - cgpu->gpu_max_alloc = memoryProperties.memoryHeaps[t.heapIndex].size; + cgpu->gpu_memory = memoryProperties.memoryHeaps[t.heapIndex].size; break; } } } + cgpu->gpu_max_alloc = physicalDeviceProperties3.maxMemoryAllocationSize; + *active += 1; most_devices++; } diff --git a/src/vulkan/gen/CMakeLists.txt b/src/vulkan/gen/CMakeLists.txt index 11ad6bf..fdeba28 100644 --- a/src/vulkan/gen/CMakeLists.txt +++ b/src/vulkan/gen/CMakeLists.txt @@ -1,5 +1,9 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) +if (MSVC) + set (CMAKE_CXX_FLAGS_DEBUG "/MD /Zi /O2 /Ob1 /DNDEBUG") +endif() + # A custom command and target to turn the Vulkan kernel into a byte array header add_custom_command( diff --git a/src/vulkan/gen/scrypt-chacha.comp b/src/vulkan/gen/scrypt-chacha.comp index e1537c3..02b8c3a 100644 --- a/src/vulkan/gen/scrypt-chacha.comp +++ b/src/vulkan/gen/scrypt-chacha.comp @@ -413,8 +413,7 @@ main() uvec4 X[8]; const uint lid = gl_LocalInvocationID.x; const uint gid = gl_GlobalInvocationID.x; - uint Nfactor = 0; - uint tmp = N >> 1; + uint tmp; uvec2 nonce; scrypt_hmac_state hmac_pw, work; @@ -432,14 +431,6 @@ main() nonce.x = global_work_offset.x + gid; } - /* Determine the Nfactor */ - while ((tmp & 1) == 0) { - tmp >>= 1; - Nfactor++; - } - - const uint effective_concurrency = (concurrent_threads << 9) >> Nfactor; - password[0] = buffer0[0]; password[1] = buffer0[1]; password[2] = buffer0[2]; @@ -555,7 +546,7 @@ main() } ////////////////////////////////////////////////// /* 2: X = ROMix(X) */ - scrypt_ROMix(X, N, gid, effective_concurrency); + scrypt_ROMix(X, N, gid, concurrent_threads); /* 3: Out = PBKDF2(password, X) */ ////////////////////////////////////////////////// diff --git a/src/vulkan/vulkan-helpers.c b/src/vulkan/vulkan-helpers.c index 6e2153f..385b503 100644 --- a/src/vulkan/vulkan-helpers.c +++ b/src/vulkan/vulkan-helpers.c @@ -80,6 +80,7 @@ int initVulkanLibrary() LOAD_VULKAN_FUNCTION(vkEndCommandBuffer); LOAD_VULKAN_FUNCTION(vkEnumeratePhysicalDevices); LOAD_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties); + LOAD_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties2); LOAD_VULKAN_FUNCTION(vkQueueSubmit); LOAD_VULKAN_FUNCTION(vkQueueWaitIdle); LOAD_VULKAN_FUNCTION(vkDestroyPipelineLayout); @@ -129,7 +130,7 @@ void vulkan_library_shutdown() int getComputeQueueFamilyIndex(uint32_t index) { if (index >= gPhysicalDeviceCount) { - applog(LOG_ERR, "Card index %u not found\n", index); + applog(LOG_ERR, "Card index %u not found", index); return -1; } uint32_t queueFamilyPropertiesCount = 0; @@ -210,7 +211,7 @@ VkDeviceMemory allocateGPUMemory(int index, VkDevice vkDevice, const VkDeviceSi VkResult ret = (memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS); if (ret != VK_SUCCESS) { - applog(LOG_ERR, "Cannot allocated %u kB GPU memory type for GPU index %u\n", (unsigned)(memorySize / 1024), index); + applog(LOG_ERR, "Cannot allocated %u kB GPU memory type for GPU index %u", (unsigned)(memorySize / 1024), index); return NULL; } @@ -345,7 +346,7 @@ VkPipeline loadShaderFromFile(VkDevice vkDevice, VkPipelineLayout pipelineLayout FILE *fp = fopen(spirv_file_name, "rb"); if (fp == NULL) { - applog(LOG_ERR, "SPIR-V program %s not found\n", spirv_file_name); + applog(LOG_ERR, "SPIR-V program %s not found", spirv_file_name); return NULL; } fseek(fp, 0, SEEK_END); @@ -357,7 +358,7 @@ VkPipeline loadShaderFromFile(VkDevice vkDevice, VkPipelineLayout pipelineLayout size_t read_size = fread(shader, sizeof(char), shader_size, fp); if (read_size != shader_size) { free(shader); - applog(LOG_ERR, "Failed to read shader %s!\n", spirv_file_name); + applog(LOG_ERR, "Failed to read shader %s!", spirv_file_name); return NULL; } @@ -410,15 +411,15 @@ static uint32_t * getShader(uint32_t workSize, uint32_t labelSize, uint32_t *sha if (vulkan_shaders_vault_header[0] == labelSize) { uint32_t *shader = (uint32_t*)calloc(1, vulkan_shaders_vault_header[1]); if (NULL == shader) { - applog(LOG_ERR, "Failed to allocate shader %u:%u %u\n", workSize, labelSize, vulkan_shaders_vault_header[1]); + applog(LOG_ERR, "Failed to allocate shader %u:%u %u", workSize, labelSize, vulkan_shaders_vault_header[1]); return NULL; } - applog(LOG_INFO, "64:%03u %u -> %u\n", vulkan_shaders_vault_header[0], vulkan_shaders_vault_header[2], vulkan_shaders_vault_header[1]); + applog(LOG_INFO, "64:%03u %u -> %u", vulkan_shaders_vault_header[0], vulkan_shaders_vault_header[2], vulkan_shaders_vault_header[1]); *shader_size = vulkan_shaders_vault_header[1]; uint8_t *src = vulkan_shaders_vault + vulkan_shaders_vault_header[3]; uLongf shaderSize = vulkan_shaders_vault_header[1]; if (Z_OK != uncompress((uint8_t*)shader, &shaderSize, src, vulkan_shaders_vault_header[2])) { - applog(LOG_ERR, "Failed to uncompress shader %u:%u\n", workSize, labelSize); + applog(LOG_ERR, "Failed to uncompress shader %u:%u", workSize, labelSize); free(shader); return NULL; } @@ -437,11 +438,11 @@ VkPipeline loadShader(VkDevice vkDevice, VkPipelineLayout pipelineLayout, VkShad uint32_t *shader = getShader(workSize, labelSize, &shader_size); if (NULL == shader) { - applog(LOG_ERR, "SPIR-V program %d:%d not found\n", workSize, labelSize); + applog(LOG_ERR, "SPIR-V program %d:%d not found", workSize, labelSize); return NULL; } - applog(LOG_INFO, "SPIR-V program %u:%u %u bytes\n", workSize, labelSize, shader_size); + applog(LOG_INFO, "SPIR-V program %u:%u %u bytes", workSize, labelSize, shader_size); VkShaderModuleCreateInfo shaderModuleCreateInfo = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, diff --git a/src/vulkan/vulkan-helpers.h b/src/vulkan/vulkan-helpers.h index 23fe301..7774af2 100644 --- a/src/vulkan/vulkan-helpers.h +++ b/src/vulkan/vulkan-helpers.h @@ -61,6 +61,7 @@ typedef struct _Vulkan { DECLARE_VULKAN_FUNCTION(vkEndCommandBuffer); DECLARE_VULKAN_FUNCTION(vkEnumeratePhysicalDevices); DECLARE_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties); + DECLARE_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties2); DECLARE_VULKAN_FUNCTION(vkQueueSubmit); DECLARE_VULKAN_FUNCTION(vkQueueWaitIdle); DECLARE_VULKAN_FUNCTION(vkDestroyPipelineLayout); diff --git a/test/README.md b/test/README.md index 37bbee7..6117ab5 100644 --- a/test/README.md +++ b/test/README.md @@ -22,4 +22,5 @@ Benchmarking and Testing --pow-diff or -d <0-256> count of leading zero bits in target D value [default - 16] --srand-seed or -ss set srand seed value for POW test: 0 - use zero id/seed [default], -1 - use random value --solution-idx or -si set solution index for POW test: index will be compared to be the found solution for Pow [default - unset] + -N set scrypt parameter N [default - 512] ``` diff --git a/test/test.cpp b/test/test.cpp index 3179e97..4d992db 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -20,6 +20,8 @@ static uint8_t s_salt[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, static const uint8_t zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static uint32_t scryptN = 512; + /* find binary substring */ void * memstr(const void *src, size_t length, const uint8_t *token, int token_length) { @@ -117,7 +119,7 @@ void do_benchmark(int aLabelSize, int aLabelsCount) { uint64_t hashes_computed; uint64_t hashes_per_sec; - int status = scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec); + int status = scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec); printf("%s: status %d, %u hashes, %u h/s\n", providers[i].model, status, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec); } } @@ -167,8 +169,8 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; referenceLabels = out + i * labelsBufferAlignedSize; memset(referenceLabels, 0, labelsBufferSize); - scryptPositions(providers[i].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); - printf("%s: %u hashes, %u h/s\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec); + int status = scryptPositions(providers[i].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); + printf("%s: %u hashes, %u h/s, status: %d\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, status); aReferenceProvider = i; checkOutput = true; break; @@ -180,8 +182,8 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; referenceLabels = out + aReferenceProvider * labelsBufferAlignedSize; memset(referenceLabels, 0, labelsBufferSize); - scryptPositions(providers[aReferenceProvider].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); - printf("%s: %u hashes, %u h/s\n", providers[aReferenceProvider].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec); + scryptPositions(providers[aReferenceProvider].id, id, 0, referenceLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, referenceLabels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); + int status = printf("%s: %u hashes, %u h/s, status %d\n", providers[aReferenceProvider].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, status); checkOutput = true; } @@ -191,7 +193,7 @@ void do_test(int aLabelSize, int aLabelsCount, int aReferenceProvider, bool aPri uint8_t D[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; uint8_t *labels = out + i * labelsBufferAlignedSize; memset(labels, 0, labelsBufferSize); - scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, labels, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); + scryptPositions(providers[i].id, id, 0, aLabelsCount - 1, aLabelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, labels, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); printf("%s: %u hashes, %u h/s\n", providers[i].model, (uint32_t)hashes_computed, (uint32_t)hashes_per_sec); if (memstr(labels, labelsBufferSize, zeros, 8)) { printf("ZEROS result\n"); @@ -309,7 +311,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize) if (idx_solution == -1ull) { printf("Compute labels and look for a pow solution... Iteration: %d\n", j); - int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS | SPACEMESH_API_COMPUTE_POW, out, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); + int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS | SPACEMESH_API_COMPUTE_POW, out, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); if (status != SPACEMESH_API_ERROR_NONE && status != SPACEMESH_API_POW_SOLUTION_FOUND) { printf("Compute error: %u\n", status); @@ -331,7 +333,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize) printf("Compute labels only... Iteration: %d\n", j); uint64_t idx_temp = -1; - int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, 512, 1, 1, D, &idx_temp, &hashes_computed, &hashes_per_sec); + int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_LEAFS, out, scryptN, 1, 1, D, &idx_temp, &hashes_computed, &hashes_per_sec); if (status != SPACEMESH_API_ERROR_NONE && status != SPACEMESH_API_POW_SOLUTION_FOUND) { printf("Compute returned an error: %u", status); @@ -351,7 +353,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize) printf("Calling pow compute...\n"); - int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_POW, out, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); + int status = scryptPositions(providers[i].id, id, idx, idx + labels_per_iter - 1, labelSize, salt, SPACEMESH_API_COMPUTE_POW, out, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); printf("Compute pow only at index: %llu. hashes computed: %llu (%llu h/s)\n", idx, hashes_computed, hashes_per_sec); @@ -379,7 +381,7 @@ void test_core(int aLabelsCount, unsigned aDiff, unsigned aSeed, int labelSize) // compute 256 hash at solution index: uint8_t hash[32]; - scryptPositions(cpu_id, id, idx_solution, idx_solution, 256, salt, SPACEMESH_API_COMPUTE_LEAFS, hash, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec); + scryptPositions(cpu_id, id, idx_solution, idx_solution, 256, salt, SPACEMESH_API_COMPUTE_LEAFS, hash, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec); printf("D: "); print_hex32(D); @@ -455,14 +457,14 @@ int do_test_pow(uint64_t aStartPos, int aLabelsCount, unsigned aDiff, unsigned a uint64_t hashes_computed; uint64_t hashes_per_sec; printf("%s: ", providers[i].model); - int status = scryptPositions(providers[i].id, s_id, aStartPos, aStartPos + aLabelsCount - 1, 8, s_salt, SPACEMESH_API_COMPUTE_POW, NULL, 512, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); + int status = scryptPositions(providers[i].id, s_id, aStartPos, aStartPos + aLabelsCount - 1, 8, s_salt, SPACEMESH_API_COMPUTE_POW, NULL, scryptN, 1, 1, D, &idx_solution, &hashes_computed, &hashes_per_sec); switch (status) { case SPACEMESH_API_POW_SOLUTION_FOUND: printf("%u hashes, %u h/s, solution at %u\n", (uint32_t)hashes_computed, (uint32_t)hashes_per_sec, (uint32_t)idx_solution); if (-1 != cpu_id) { uint8_t hash[32]; memset(hash, 0, sizeof(hash)); - scryptPositions(cpu_id, s_id, idx_solution, idx_solution, 256, s_salt, SPACEMESH_API_COMPUTE_LEAFS, hash, 512, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec); + scryptPositions(cpu_id, s_id, idx_solution, idx_solution, 256, s_salt, SPACEMESH_API_COMPUTE_LEAFS, hash, scryptN, 1, 1, NULL, NULL, &hashes_computed, &hashes_per_sec); printf("id: "); print_hex32(s_id); printf("\n"); @@ -891,6 +893,18 @@ int main(int argc, char **argv) solutionIdx = strtoull(argv[i], NULL, 10); } } + else if (0 == strcmp(argv[i], "-N")) { + i++; + if (i < argc) { + scryptN = strtoul(argv[i], NULL, 10); + } + } + else if (0 == strcmp(argv[i], "--srand-seed") || 0 == strcmp(argv[i], "-ss")) { + i++; + if (i < argc) { + srand_seed = strtoul(argv[i], NULL, 10); + } + } else if (0 == strcmp(argv[i], "-id")) { i++; if (i < argc) {