Skip to content

Commit

Permalink
significant performance improvement for rfv2
Browse files Browse the repository at this point in the history
this is based on latest updates which reduce the number of rounds and allocates only one rambox for all threads.
it roughly 50 times faster but reports much higher performance as the calculation includes all skipped hashes as well.
the effective performance is the reported one divided by 256.
  • Loading branch information
MikeMurdo committed May 16, 2019
1 parent bd89c77 commit 7e418ce
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 19 deletions.
77 changes: 71 additions & 6 deletions algo/rfv2_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ typedef struct RF_ALIGN(16) rfv2_ctx {
uint32_t word; // LE pending message
uint32_t len; // total message length
uint32_t crc;
uint16_t changes; // must remain lower than RFV2_RAMBOX_HIST
uint16_t changes; // must remain lower than RFV2_RAMBOX_HIST, 65535=R/O
uint16_t left_bits; // adjust rambox probability
uint64_t *rambox;
uint32_t rb_o; // rambox offset
Expand Down Expand Up @@ -150,6 +150,12 @@ static const uint8_t rfv2_iv[32] = {
0x33,0x68,0x7c,0xed,0x73,0x35,0x4b,0x0a,0x97,0x25,0x4c,0x77,0x7a,0xaa,0x61,0x1b
};

/* le32 memory to host representation */
static inline uint32_t rf_le32toh(uint8_t *x)
{
return x[0] + (x[1] << 8) + (x[2] << 16) + (x[3] << 24);
}

// mix the current state with the crc and return the new crc
static inline uint32_t rf_crc32x4(rf_u32 *state, uint32_t crc)
{
Expand Down Expand Up @@ -312,11 +318,13 @@ static inline uint64_t rfv2_rambox(rfv2_ctx_t *ctx, uint64_t old)
p = &ctx->rambox[idx];
k = *p;
old += rf_rotr64(k, (uint8_t)(old / ctx->rb_l));
*p = old;
if (ctx->changes < RFV2_RAMBOX_HIST) {
ctx->hist[ctx->changes] = idx;
ctx->prev[ctx->changes] = k;
ctx->changes++;
if (ctx->changes != 65535) {
*p = old;
if (ctx->changes < RFV2_RAMBOX_HIST) {
ctx->hist[ctx->changes] = idx;
ctx->prev[ctx->changes] = k;
ctx->changes++;
}
}
}
return old;
Expand Down Expand Up @@ -769,3 +777,60 @@ int rfv2_hash(void *out, const void *in, size_t len, void *rambox, const void *r
{
return rfv2_hash2(out, in, len, rambox, rambox_template, RFV2_INIT_CRC);
}

/* scans nonces from <min> to <max> applying them to message <msg> and stopping
* once a hash gives a result at least as good as <target>. It uses <rambox>,
* which must have been pre-initialized, and leaves the resulting hash in
* <hash> which must contain at least 32 bytes and be 32-bit aligned. It
* returns zero if no solution is found, otherwise 1. It only works with 32-bit
* aligned 80-byte block headers in big endian format and places the nonce in
* big endian format at the end of the message to hash it. In case of success,
* the caller has to extract the nonce from the message. It also stops if
* <stop> is non-NULL and the location it points to contains a non-null value
* (used to interrupt scanning by another thread).
*/
int rfv2_scan_hdr(char *msg, void *rambox, uint32_t *hash, uint32_t target, uint32_t min, uint32_t max, volatile char *stop)
{
uint32_t msgh, msgh_init, nonce;
rfv2_ctx_t ctx;

// pre-compute the hash state based on the constant part of the header
msgh_init = rf_crc32_mem(0, msg, 76);

for (nonce = min;; nonce++) {
msg[76] = nonce >> 24;
msg[77] = nonce >> 16;
msg[78] = nonce >> 8;
msg[79] = nonce;

msgh = rf_crc32_mem(msgh_init, msg + 76, 4);
if (sin_scaled(msgh) != 2)
goto next;

rfv2_init(&ctx, RFV2_INIT_CRC, rambox);
ctx.changes = 65535; // mark the rambox read-only

ctx.rb_o = msgh % (ctx.rb_l / 2);
ctx.rb_l = (ctx.rb_l / 2 - ctx.rb_o) * 2;

/* first loop */
rfv2_update(&ctx, msg, 80);
rfv2_pad256(&ctx);

/* second loop */
rfv2_update(&ctx, msg, 80);
rfv2_pad256(&ctx);

/* final */
rfv2_final(hash, &ctx);

if (rf_le32toh((uint8_t *)(hash + 7)) <= target)
return 1;
next:
if (nonce == max)
return 0;

if (stop && *stop)
return 0;
}
}
47 changes: 34 additions & 13 deletions algo/rfv2_cpuminer.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,43 +32,64 @@ int scanhash_rfv2(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *h
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
void *rambox;
static void *rambox;
int ret = 0;

if (opt_benchmark)
Htarg = ptarget[7] = 0x1ffffff;
Htarg = ptarget[7] = 0x1ffff;

//printf("thd%d work=%p htarg=%08x ptarg7=%08x first_nonce=%08x max_nonce=%08x hashes_done=%Lu\n",
// thr_id, work, Htarg, ptarget[7], first_nonce, max_nonce, (unsigned long)*hashes_done);

for (int k=0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);

rambox = malloc(RFV2_RAMBOX_SIZE * 8);
if (rambox == NULL)
goto out;
if (!rambox) {
//printf("Rambox not yet initialized\n");
if (!thr_id) {
/* only thread 0 is responsible for allocating the shared rambox */
void *r = malloc(RFV2_RAMBOX_SIZE * 8);
if (r == NULL) {
//printf("[%d] rambox allocation failed\n", thr_id);
*(volatile void **)&rambox = (void*)0x1;
goto out;
}
//printf("Thread %d initializing the rambox\n", thr_id);
rfv2_raminit(r);
*(volatile void **)&rambox = r;
} else {
/* wait for thread 0 to finish alloc+init of rambox */
//printf("Thread %d waiting for rambox init\n", thr_id);
while (!*(volatile void **)&rambox)
usleep(100000);
}
}

rfv2_raminit(rambox);
// pre-compute the hash state based on the constant part of the header
if (*(volatile void **)&rambox == (void*)0x1) {
//printf("[%d] rambox allocation failed\n", thr_id);
goto out; // the rambox wasn't properly initialized
}

do {
be32enc(&endiandata[19], nonce);
rfv2_hash(hash, endiandata, 80, rambox, NULL);
ret = rfv2_scan_hdr((char *)endiandata, rambox, hash, Htarg, nonce, max_nonce, restart);
nonce = be32toh(endiandata[19]);
if (!ret)
break;

// drop invalid shares caused by rambox collisions
rfv2_hash(hash, (char *)endiandata, 80, rambox, NULL);

if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
if (rf_le32toh((uint8_t*)(hash+7)) <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
ret = 1;
goto out;
}
next:
nonce++;
} while (nonce < max_nonce && !(*restart));

pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
out:
free(rambox);
return ret;
}

0 comments on commit 7e418ce

Please sign in to comment.