Merge pull request #24 from tontechio/develop

tonlib improvements, cuda fix
tontechio · Nov 5, 2021 · 952873b · 952873b
2 parents daa7e46 + 9249ead
commit 952873b
Show file tree

Hide file tree

Showing 13 changed files with 92 additions and 42 deletions.
diff --git a/crypto/util/Miner.cpp b/crypto/util/Miner.cpp
@@ -19,6 +19,7 @@
 #include "Miner.h"
 
 #include "td/utils/Random.h"
+#include "td/utils/format.h"
 #include "td/utils/misc.h"
 #include "td/utils/crypto.h"
 #include "td/utils/port/Clocks.h"
@@ -88,7 +89,13 @@ void Miner::print_stats(td::Timestamp start_at, td::uint64 hashes_computed, td::
   if (passed < 1e-9) {
     passed = 1;
   }
-  LOG(INFO) << "[ passed: "<< passed << "s, hashes computed: " << hashes_computed << " (" << static_cast<double>(hashes_computed)/static_cast<double>(hashes_expected)*100 << "%), speed: " << static_cast<double>(hashes_computed) / passed << " hps ]";
+  double speed = static_cast<double>(hashes_computed) / passed;
+  std::stringstream ss;
+  ss << std::scientific << std::setprecision(1) << speed;
+  LOG(INFO) << "[ mining in progress, passed: " << td::format::as_time(passed)
+            << ", hashes computed: " << hashes_computed << " ("
+            << static_cast<double>(hashes_computed) / static_cast<double>(hashes_expected) * 100
+            << "%), speed: " << ss.str() << " hps ]";
 };
 
 td::optional<std::string> build_mine_result(int cpu_id, ton::HDataEnv H, const ton::Miner::Options &options,

diff --git a/crypto/util/Miner.h b/crypto/util/Miner.h
@@ -42,6 +42,7 @@ class Miner {
     td::Timestamp start_at;
     int verbosity;
     td::int32 gpu_id;
+    td::int32 platform_id;
     td::int32 threads;
     td::uint32 gpu_threads = 16;
     td::uint64 factor = 16;

diff --git a/crypto/util/cuda/cuda.cpp b/crypto/util/cuda/cuda.cpp
@@ -7,7 +7,7 @@
 
 #ifndef _WIN32
 #include <unistd.h>
-#include <sys/time.h>=
+#include <sys/time.h>
 #else
 #include <win_usleep.hpp>
 #include <win_gettimeofday.hpp>

diff --git a/crypto/util/opencl/opencl.cpp b/crypto/util/opencl/opencl.cpp
@@ -53,6 +53,7 @@ void OpenCL::print_devices() {
 
 void OpenCL::create_context(cl_uint platform_idx, cl_uint device_idx) {
   char buf[1024];
+  CL_WRAPPER(clGetDeviceIDs(platforms_[platform_idx], CL_DEVICE_TYPE_ALL, device_count_, devices_, NULL));
   CL_WRAPPER(clGetDeviceInfo(devices_[device_idx], CL_DEVICE_NAME, sizeof(buf), buf, NULL));
   CL_WRAPPER(clGetDeviceInfo(devices_[device_idx], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size_),
                              &max_work_group_size_, NULL));
@@ -157,6 +158,12 @@ void OpenCL::load_objects(uint32_t gpu_id, uint32_t cpu_id, unsigned char *data,
 }
 
 void OpenCL::release() {
+  CL_WRAPPER(clReleaseCommandQueue(command_queue_));
+  CL_WRAPPER(clReleaseKernel(kernel_));
+  CL_WRAPPER(clReleaseProgram(program_));
+  CL_WRAPPER(clReleaseContext(context_));
+  CL_WRAPPER(clReleaseDevice(devices_[device_idx_]));
+
   CL_WRAPPER(clReleaseMemObject(buffer_result_));
   CL_WRAPPER(clReleaseMemObject(buffer_expired_));
   CL_WRAPPER(clReleaseMemObject(buffer_start_nonce_));
@@ -166,11 +173,10 @@ void OpenCL::release() {
   CL_WRAPPER(clReleaseMemObject(buffer_target_));
   CL_WRAPPER(clReleaseMemObject(buffer_data_));
   CL_WRAPPER(clReleaseMemObject(buffer_rdata_));
-  CL_WRAPPER(clReleaseCommandQueue(command_queue_));
-  CL_WRAPPER(clReleaseKernel(kernel_));
-  CL_WRAPPER(clReleaseProgram(program_));
-  CL_WRAPPER(clReleaseContext(context_));
-  CL_WRAPPER(clReleaseDevice(devices_[device_idx_]));
+
+  free(devices_);
+  free(platforms_);
+  free(source_str_);
 }
 
 HashResult OpenCL::scan_hash(uint cpu_id, uint32_t gpu_threads, td::uint64 threads, td::uint64 start_nonce, uint expired) {

diff --git a/crypto/util/opencl/opencl.h b/crypto/util/opencl/opencl.h
@@ -50,10 +50,6 @@ class OpenCL {
   cl_command_queue command_queue_;
 
  private:
-  cl_mem pinned_saved_keys_;
-  cl_uchar *saved_plain_;
-  cl_mem pinned_partial_hashes_;
-  cl_uint *partial_hashes_;
   cl_mem buffer_rdata_;
   cl_mem buffer_data_;
   cl_mem buffer_target_;

diff --git a/crypto/util/opencl/sha256.cpp b/crypto/util/opencl/sha256.cpp
@@ -16,7 +16,7 @@ td::optional<std::string> SHA256::run(ton::HDataEnv H, unsigned char *rdata, con
   //opencl.load_source("sha256.cl");
   opencl.set_source(sha256_cl, sha256_cl_len);
   opencl.print_devices();
-  opencl.create_context(0, options.gpu_id);
+  opencl.create_context(options.platform_id, options.gpu_id);
   opencl.create_kernel();
 
   // data

diff --git a/crypto/util/pow-miner-howto.md b/crypto/util/pow-miner-howto.md
@@ -3,12 +3,13 @@
 Invoke the pow-miner-cuda (pow-miner-opencl) utility as follows:
 
 ```
-$ crypto/pow-miner-cuda -vv -g<gpu-id> -F<boost-factor> -t<timeout-in-sec> <your-wallet-address> <seed> <complexity> <iterations> <pow-giver-address> <boc-filename>
+$ crypto/pow-miner-cuda -vv -g<gpu-id> -p<platform-id> -F<boost-factor> -t<timeout-in-sec> <your-wallet-address> <seed> <complexity> <iterations> <pow-giver-address> <boc-filename>
 ```
 
 Here:
 
 - `gpu-id`: GPU device ID
+- `platform-id`: GPU platform ID (OpenCl only)
 - `boost-factor`: 1..65536, the multiplier for throughput, affects the number of hashes processed per iteration on the GPU
 - `timeout-in-sec`: max amount of seconds that the miner would run before admitting failure
 - `your-wallet-address`: the address of your wallet (possibly not initialized yet), either in the masterchain or in the workchain (note that you need a masterchain wallet to control a validator)
@@ -53,13 +54,14 @@ If they change, the task is restarted. If a solution is found, it sends it to th
 Invoke the tonlib-cuda-cli (tonlib-opencl-cli) utility as follows:
 
 ```
-$  tonlib/tonlib-cuda-cli -v 3 -C <lite-server-config> -e 'pminer start <giver_address> <my_address> <gpu-id> [boost-factor]'
+$  tonlib/tonlib-cuda-cli -v 3 -C <lite-server-config> -e 'pminer start <giver_address> <my_address> <gpu-id> [boost-factor] [platform-id]'
 ```
 
 Here:
 
 - `lite-server-config`: last config from https://newton-blockchain.github.io/global.config.json
 - `gpu-id`: GPU device ID
+- `platform-id`: GPU platform ID (OpenCl only)
 - `boost-factor`: 1..65536, the multiplier for throughput, affects the number of hashes processed per iteration on the GPU
 - `giver_address`: the address of the selected giver
 - `my_address`: the address of your wallet (possibly not initialized yet), either in the masterchain or in the workchain (note that you need a masterchain wallet to control a validator)
@@ -91,10 +93,10 @@ synchronization: DONE in 211.6ms
 
 ## TONLIB CLI logging
 
-To redirect the output to a file, add `&> pminer.log` to the command:
+To redirect the output to a file, add `-l pminer.log` to the command:
 
 ```
-nohup tonlib/tonlib-cuda-cli -v 3 -C global.config.json -e 'pminer start Ef-FV4QTxLl-7Ct3E6MqOtMt-RGXMxi27g4I645lw6MTWg0f kQBWkNKqzCAwA9vjMwRmg7aY75Rf8lByPA9zKXoqGkHi8SM7 0 32' &> pminer.log
+nohup tonlib/tonlib-cuda-cli -v 3 -C global.config.json -e 'pminer start Ef-FV4QTxLl-7Ct3E6MqOtMt-RGXMxi27g4I645lw6MTWg0f kQBWkNKqzCAwA9vjMwRmg7aY75Rf8lByPA9zKXoqGkHi8SM7 0 32' -l pminer.log
 ```
 
 ## TONLIB CLI automation

diff --git a/crypto/util/pow-miner-windows-howto.md b/crypto/util/pow-miner-windows-howto.md
@@ -19,15 +19,15 @@ If you want more flexibility or control you can also use the `pow-miner-cuda.exe
 6. Invoke the `tonlib-cuda-cli.exe` (`tonlib-opencl-cli.exe`) utility as follows:
 
 ```
-> tonlib-cuda-cli.exe -v 3 -C <global-config> -e "pminer start <giver_addess> <my_address> <gpu-id> [boost-factor]"
+> tonlib-cuda-cli.exe -v 3 -C <global-config> -e "pminer start <giver_addess> <my_address> <gpu-id> [boost-factor] [platform-id]"
 ```
 
 ## TONLIB CLI logging
 
-To redirect the output to a file, add `>> pminer.log` to the command:
+To redirect the output to a file, add `-l pminer.log` to the command:
 
 ```
-> tonlib-cuda-cli.exe -v 3 -C global.config.json -e "pminer start Ef-FV4QTxLl-7Ct3E6MqOtMt-RGXMxi27g4I645lw6MTWg0f kQBWkNKqzCAwA9vjMwRmg7aY75Rf8lByPA9zKXoqGkHi8SM7 0 32" >> pminer.log
+> tonlib-cuda-cli.exe -v 3 -C global.config.json -e "pminer start Ef-FV4QTxLl-7Ct3E6MqOtMt-RGXMxi27g4I645lw6MTWg0f kQBWkNKqzCAwA9vjMwRmg7aY75Rf8lByPA9zKXoqGkHi8SM7 0 32" -l pminer.log
 ```
 
 ## TONLIB CLI automation

diff --git a/crypto/util/pow-miner.cpp b/crypto/util/pow-miner.cpp
@@ -66,6 +66,7 @@ int usage() {
             << " [-v][-B]"
 #if defined MINERCUDA || defined MINEROPENCL
                "[-g<gpu-id>]"
+               "[-p<platform-id>]"
                "[-F<boost-factor>]"
 #else
                "[-w<threads>]"
@@ -124,9 +125,10 @@ double print_stats() {
     passed = 1;
   }
   double speed = static_cast<double>(hashes_computed) / passed;
-  LOG(INFO) << "[ passed: " << passed << "s, hashes computed: " << hashes_computed << " ("
-            << static_cast<double>(hashes_computed) / static_cast<double>(hash_rate) * 100 << "%), speed: " << speed
-            << " hps ]";
+  std::stringstream ss;
+  ss << std::scientific << std::setprecision(1) << speed;
+  LOG(INFO) << "[ passed: " << td::format::as_time(passed) << ", hashes computed: " << hashes_computed << " ("
+     << static_cast<double>(hashes_computed) / static_cast<double>(hash_rate) * 100 << "%), speed: " << ss.str() << " hps ]";
   return speed;
 }
 
@@ -213,11 +215,14 @@ class MinerBench : public td::Benchmark {
       }
     }
 
+    std::stringstream ss;
+    ss << std::scientific << std::setprecision(1) << best_speed_;
+
     LOG(ERROR) << "";
     LOG(ERROR) << "*************************************************";
     LOG(ERROR) << "***";
     LOG(ERROR) << "***   best boost factor: " << best_factor_;
-    LOG(ERROR) << "***   best speed:        " << best_speed_ << " hps";
+    LOG(ERROR) << "***   best speed:        " << ss.str() << " hps";
     LOG(ERROR) << "***";
     LOG(ERROR) << "*************************************************";
     LOG(ERROR) << "";
@@ -236,9 +241,9 @@ int main(int argc, char* const argv[]) {
   ton::Miner::Options options;
 
   progname = argv[0];
-  int i, threads = 1, factor = 16, gpu_id = -1, timeout = 0;
+  int i, threads = 1, factor = 16, gpu_id = -1, platform_id = 0, timeout = 0;
   bool bounce = false, benchmark = false;
-  while ((i = getopt(argc, argv, "bnvw:g:G:F:t:Bh:V")) != -1) {
+  while ((i = getopt(argc, argv, "bnvw:g:p:G:F:t:Bh:V")) != -1) {
     switch (i) {
       case 'v':
         ++verbosity;
@@ -255,6 +260,10 @@ int main(int argc, char* const argv[]) {
         gpu_id = atoi(optarg);
         CHECK(gpu_id >= 0 && gpu_id <= 16);
         break;
+      case 'p':
+        platform_id = atoi(optarg);
+        CHECK(platform_id >= 0 && platform_id <= 16);
+        break;
       case 'G':
         // deprecated
         break;
@@ -315,6 +324,7 @@ int main(int argc, char* const argv[]) {
 #endif
 
   options.gpu_id = gpu_id;
+  options.platform_id = platform_id;
   options.token_ = token.get_cancellation_token();
 
   if (argc != optind + 4 && argc != optind + 6) {

diff --git a/crypto/util/pow-miner.md b/crypto/util/pow-miner.md
@@ -106,7 +106,7 @@ This is a GPU-miner compatible with Nvidia hardware. Can be used in multi-GPU en
 to see the list of available GPUs in the system.
 
 ```shell
-usage: crypto/pow-miner-cuda [-v][-B][-g<gpu-id>][-F<boost factor>] [-t<timeout>] <my-address> <pow-seed> <pow-complexity> <iterations> [<miner-addr> <output-ext-msg-boc>] [-V]
+usage: crypto/pow-miner-cuda [-v][-B][-g<gpu-id>][-p<platform-id>][-F<boost factor>] [-t<timeout>] <my-address> <pow-seed> <pow-complexity> <iterations> [<miner-addr> <output-ext-msg-boc>] [-V]
 Outputs a valid <rdata> value for proof-of-work testgiver after computing at most <iterations> hashes or terminates with non-zero exit code
 ```
 
@@ -116,7 +116,7 @@ This is a GPU-miner compatible with AMD hardware. Can be used in multi-GPU envir
 to see the list of available GPUs in the system.
 
 ```shell
-usage: crypto/pow-miner-cuda [-v][-B][-g<gpu-id>][-F<boost factor>] [-t<timeout>] <my-address> <pow-seed> <pow-complexity> <iterations> [<miner-addr> <output-ext-msg-boc>] [-V]
+usage: crypto/pow-miner-cuda [-v][-B][-g<gpu-id>][-p<platform-id>][-F<boost factor>] [-t<timeout>] <my-address> <pow-seed> <pow-complexity> <iterations> [<miner-addr> <output-ext-msg-boc>] [-V]
 Outputs a valid <rdata> value for proof-of-work testgiver after computing at most <iterations> hashes or terminates with non-zero exit code
 ```
 
@@ -137,7 +137,7 @@ This is a TONLIB CLI with embedded GPU-miner compatible with Nvidia hardware. Ca
 to see the list of available GPUs in the system.
 
 ```shell
-§usage: /usr/bin/ton/tonlib/tonlib-cuda-cli [-v] -C <lite-server-config> -e 'pminer start <giver_addess> <my_address> <gpu-id> [boost-factor]'
+§usage: /usr/bin/ton/tonlib/tonlib-cuda-cli [-v] -C <lite-server-config> -e 'pminer start <giver_addess> <my_address> <gpu-id> [boost-factor] [platform-id]'
 The behavior is the same as the tonlib-cli CPU miner.
 ```
 
@@ -147,7 +147,7 @@ This is a TONLIB CLI with embedded GPU-miner compatible with AMD hardware. Can b
 to see the list of available GPUs in the system.
 
 ```shell
-usage: /usr/bin/ton/tonlib/tonlib-opencl-cli [-v] -C <lite-server-config> -e 'pminer start <giver_addess> <my_address> <gpu-id> [boost-factor]'
+usage: /usr/bin/ton/tonlib/tonlib-opencl-cli [-v] -C <lite-server-config> -e 'pminer start <giver_addess> <my_address> <gpu-id> [boost-factor] [platform-id]'
 The behavior is the same as the tonlib-cli CPU miner.
 ```
 

diff --git a/tdutils/td/utils/TsFileLog.cpp b/tdutils/td/utils/TsFileLog.cpp
@@ -32,13 +32,14 @@ namespace td {
 namespace detail {
 class TsFileLog : public LogInterface {
  public:
-  Status init(string path, td::int64 rotate_threshold, bool redirect_stderr) {
+  Status init(string path, td::int64 rotate_threshold, bool redirect_stderr, bool merge_thread_logs) {
     path_ = std::move(path);
     rotate_threshold_ = rotate_threshold;
     redirect_stderr_ = redirect_stderr;
     for (int i = 0; i < (int)logs_.size(); i++) {
       logs_[i].id = i;
     }
+    merge_thread_logs_ = merge_thread_logs;
     return init_info(&logs_[0]);
   }
 
@@ -68,6 +69,7 @@ class TsFileLog : public LogInterface {
   bool redirect_stderr_;
   std::string path_;
   std::array<Info, MAX_THREAD_ID> logs_;
+  bool merge_thread_logs_;
 
   LogInterface *get_current_logger() {
     auto *info = get_current_info();
@@ -88,7 +90,7 @@ class TsFileLog : public LogInterface {
   }
 
   string get_path(Info *info) {
-    if (info->id == 0) {
+    if (info->id == 0 || merge_thread_logs_) {
       return path_;
     }
     return PSTRING() << path_ << ".thread" << info->id << ".log";
@@ -104,9 +106,9 @@ class TsFileLog : public LogInterface {
 };
 }  // namespace detail
 
-Result<td::unique_ptr<LogInterface>> TsFileLog::create(string path, td::int64 rotate_threshold, bool redirect_stderr) {
+Result<td::unique_ptr<LogInterface>> TsFileLog::create(string path, td::int64 rotate_threshold, bool redirect_stderr, bool merge_thread_logs) {
   auto res = td::make_unique<detail::TsFileLog>();
-  TRY_STATUS(res->init(path, rotate_threshold, redirect_stderr));
+  TRY_STATUS(res->init(path, rotate_threshold, redirect_stderr, merge_thread_logs));
   return std::move(res);
 }
 }  // namespace td
diff --git a/tdutils/td/utils/TsFileLog.h b/tdutils/td/utils/TsFileLog.h
@@ -25,10 +25,11 @@
 
 namespace td {
 class TsFileLog {
+ public:
   static constexpr int64 DEFAULT_ROTATE_THRESHOLD = 10 * (1 << 20);
 
  public:
   static Result<td::unique_ptr<LogInterface>> create(string path, int64 rotate_threshold = DEFAULT_ROTATE_THRESHOLD,
-                                                     bool redirect_stderr = true);
+                                                     bool redirect_stderr = true, bool merge_thread_logs = false);
 };
 }  // namespace td