This commit is contained in:
Jay D Dee
2019-07-04 12:12:11 -04:00
parent 0d769ee0fe
commit 9abc19a30a
29 changed files with 220 additions and 155 deletions

View File

@@ -88,6 +88,7 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[8*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]); uint32_t *hash7 = &(hash[25]);
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -115,12 +116,11 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
0 0
}; };
// for ( int i=0; i < 19; i++ ) for ( int i=0; i < 19; i++ )
// be32enc( &endiandata[i], pdata[i] ); be32enc( &edata[i], pdata[i] );
// uint64_t *edata = (uint64_t*)endiandata; mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 ); // mm256_bswap_intrlv80_4x64( vdata, pdata );
mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m = 0; m < 6; m++ ) for ( int m = 0; m < 6; m++ )
{ {

View File

@@ -21,6 +21,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32))); uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[25]); // 3*8+1 uint32_t *hash7 = &(hash[25]); // 3*8+1
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -31,7 +32,9 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
// const uint32_t Htarg = ptarget[7]; // const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do { do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );

View File

@@ -91,6 +91,7 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[8*8] __attribute__ ((aligned (64))); uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]); uint32_t *hash7 = &(hash[7<<3]);
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -104,7 +105,10 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff; ( (uint32_t*)ptarget )[7] = 0x0000ff;
mm256_bswap_intrlv80_8x32( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_8x32( vdata, pdata );
do do
{ {
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4, *noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,

View File

@@ -146,6 +146,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[8*8] __attribute__ ((aligned (64))); uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
@@ -157,7 +158,10 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
ptarget[7] = 0x0000ff; ptarget[7] = 0x0000ff;
mm256_bswap_intrlv80_8x32( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_8x32( vdata, pdata );
lyra2z_8way_midstate( vdata ); lyra2z_8way_midstate( vdata );
do { do {

View File

@@ -165,6 +165,7 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -189,7 +190,9 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
0 0
}; };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++) for (int m=0; m < 6; m++)
if (Htarg <= htmax[m]) if (Htarg <= htmax[m])

View File

@@ -170,6 +170,7 @@ int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64))); uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]); uint32_t *hash7 = &(hash[25]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -179,7 +180,9 @@ int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i*)vdata + 9; // aligned __m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do do
{ {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(

View File

@@ -36,6 +36,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
{ {
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32))); uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
@@ -59,7 +60,10 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
0 }; 0 };
// Need big endian data // Need big endian data
mm256_bswap_intrlv80_8x32( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 ); sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 ); sha256_8way( &sha256_ctx8, vdata, 64 );
@@ -129,6 +133,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32))); uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -152,7 +157,9 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000, 0xFFFF0000,
0 }; 0 };
mm128_bswap_intrlv80_4x32( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 );
// mm128_bswap_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 ); sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 ); sha256_4way( &sha256_ctx4, vdata, 64 );

View File

@@ -163,6 +163,7 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
{ {
uint32_t vdata[20*8] __attribute__ ((aligned (64))); uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32))); uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<3]); uint32_t *hash7 = &(hash[7<<3]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -186,8 +187,12 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
0xFFFF0000, 0xFFFF0000,
0 }; 0 };
swab32_array( edata, pdata, 20 );
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
// Need big endian data // Need big endian data
mm256_bswap_intrlv80_8x32( vdata, pdata ); // mm256_bswap_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 ); sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 ); sha256_8way( &sha256_ctx8, vdata, 64 );
@@ -248,6 +253,7 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32))); uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64))); uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -271,7 +277,10 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000, 0xFFFF0000,
0 }; 0 };
mm128_bswap_intrlv80_4x32( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm128_intrlv_4x32( vdata, edata, edata, edata, edata, 640 );
// mm128_bswap_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 ); sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 ); sha256_4way( &sha256_ctx4, vdata, 64 );

View File

@@ -63,6 +63,7 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (64))); uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -73,7 +74,9 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i*)vdata + 9; // aligned __m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do do
{ {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(

View File

@@ -32,25 +32,24 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce; uint32_t n = first_nonce;
// __m256i *noncev = (__m256i*)vdata + 9; // aligned __m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t *noncep = vdata + 73; // 9*8 + 1 // uint32_t *noncep = vdata + 73; // 9*8 + 1
swab32_array( edata, pdata, 20 ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 ); mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata ); // mm256_bswap_intrlv80_4x64( vdata, pdata );
do do
{ {
be32enc( noncep, n ); // be32enc( noncep, n );
be32enc( noncep+2, n+1 ); // be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 ); // be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 ); // be32enc( noncep+6, n+3 );
// *noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
// _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skein2hash_4way( hash, vdata ); skein2hash_4way( hash, vdata );

View File

@@ -165,7 +165,7 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t edata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -178,7 +178,9 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++) for (int m=0; m < 6; m++)
if (Htarg <= htmax[m]) if (Htarg <= htmax[m])

View File

@@ -64,6 +64,7 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64))); uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -86,7 +87,9 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
0xFFFF0000, 0xFFFF0000,
0 }; 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
// precalc midstate // precalc midstate
// doing it one way then then interleaving would be faster but too // doing it one way then then interleaving would be faster but too

View File

@@ -164,7 +164,7 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64))); uint32_t edata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -177,7 +177,9 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++) for (int m=0; m < 6; m++)
if (Htarg <= htmax[m]) if (Htarg <= htmax[m])

View File

@@ -171,6 +171,7 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -183,7 +184,9 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for (int m=0; m < 6; m++) for (int m=0; m < 6; m++)
if (Htarg <= htmax[m]) if (Htarg <= htmax[m])

View File

@@ -114,6 +114,7 @@ int scanhash_phi1612_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -124,7 +125,9 @@ int scanhash_phi1612_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0cff; ( (uint32_t*)ptarget )[7] = 0x0cff;
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do { do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(

View File

@@ -78,6 +78,7 @@ int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -90,7 +91,9 @@ int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff; ((uint32_t*)ptarget)[7] = 0x0cff;
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do do
{ {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(

View File

@@ -189,6 +189,7 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -201,7 +202,9 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ ) for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] ) if ( Htarg <= htmax[m] )

View File

@@ -214,6 +214,7 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -226,7 +227,9 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
blake512_4way_init( &x13sm3_ctx_mid ); blake512_4way_init( &x13sm3_ctx_mid );
blake512_4way( &x13sm3_ctx_mid, vdata, 64 ); blake512_4way( &x13sm3_ctx_mid, vdata, 64 );

View File

@@ -105,6 +105,7 @@ int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
@@ -117,7 +118,9 @@ int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
ptarget[7] = 0x0cff; ptarget[7] = 0x0cff;
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do { do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev ); _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );

View File

@@ -82,6 +82,7 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
@@ -94,7 +95,9 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
ptarget[7] = 0x0cff; ptarget[7] = 0x0cff;
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do do
{ {

View File

@@ -193,6 +193,7 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*16] __attribute__ ((aligned (64))); uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
@@ -205,7 +206,9 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ ) for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] ) if ( Htarg <= htmax[m] )

View File

@@ -217,6 +217,7 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
@@ -230,7 +231,10 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ ) for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] ) if ( Htarg <= htmax[m] )

View File

@@ -808,6 +808,7 @@ int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -823,7 +824,9 @@ int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
// Need big endian data // Need big endian data
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] ) for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{ {
uint32_t mask = masks[m]; uint32_t mask = masks[m];

View File

@@ -207,6 +207,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -222,7 +223,9 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
0xFFFFF000, 0xFFFF0000, 0 }; 0xFFFFF000, 0xFFFF0000, 0 };
// Need big endian data // Need big endian data
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] ) for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{ {
uint32_t mask = masks[ m ]; uint32_t mask = masks[ m ];

View File

@@ -334,6 +334,7 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
@@ -348,7 +349,9 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
if ( opt_benchmark ) if ( opt_benchmark )
ptarget[7] = 0x0cff; ptarget[7] = 0x0cff;
mm256_bswap_intrlv80_4x64( vdata, pdata ); swab32_array( edata, pdata, 20 );
mm256_intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
// mm256_bswap_intrlv80_4x64( vdata, pdata );
do { do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32( *noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev ); _mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.5.1. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.5.2.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.9.5.1' PACKAGE_VERSION='3.9.5.2'
PACKAGE_STRING='cpuminer-opt 3.9.5.1' PACKAGE_STRING='cpuminer-opt 3.9.5.2'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.9.5.1 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.9.5.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.9.5.1:";; short | recursive ) echo "Configuration of cpuminer-opt 3.9.5.2:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.9.5.1 cpuminer-opt configure 3.9.5.2
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.9.5.1, which was It was created by cpuminer-opt $as_me 3.9.5.2, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.9.5.1' VERSION='3.9.5.2'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.9.5.1, which was This file was extended by cpuminer-opt $as_me 3.9.5.2, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.9.5.1 cpuminer-opt config.status 3.9.5.2
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.9.5.1]) AC_INIT([cpuminer-opt], [3.9.5.2])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -178,7 +178,7 @@ static char const short_options[] =
#endif #endif
"a:b:Bc:CDf:hm:n:p:Px:qr:R:s:t:T:o:u:O:V"; "a:b:Bc:CDf:hm:n:p:Px:qr:R:s:t:T:o:u:O:V";
static struct work g_work = {{ 0 }}; static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }};
//static struct work tmp_work; //static struct work tmp_work;
time_t g_work_time = 0; time_t g_work_time = 0;
static pthread_mutex_t g_work_lock; static pthread_mutex_t g_work_lock;
@@ -843,11 +843,11 @@ void scale_hash_for_display ( double* hashrate, char* units )
const uint64_t diff2hash = 0x40000000ULL; const uint64_t diff2hash = 0x40000000ULL;
static struct timeval five_min_start; static struct timeval five_min_start;
static double shash_sum = 0.; static double shash_sum = 0.;
static double bhash_sum = 0.; static double bhash_sum = 0.;
static double time_sum = 0.; static double time_sum = 0.;
static double latency_sum = 0.; static double latency_sum = 0.;
static uint64_t submits_sum = 0; static uint64_t submits_sum = 0;
struct share_stats_t struct share_stats_t
@@ -860,18 +860,22 @@ struct share_stats_t
// with more and more parallelism the chances of submitting multiple // with more and more parallelism the chances of submitting multiple
// shares in a very short time grows. // shares in a very short time grows.
#define s_stats_size 4 #define s_stats_size 8
static struct share_stats_t share_stats[ s_stats_size ]; static struct share_stats_t share_stats[ s_stats_size ];
static int s_get_ptr = 0, s_put_ptr = 0; static int s_get_ptr = 0, s_put_ptr = 0;
static struct timeval last_submit_time = {0}; static struct timeval last_submit_time = {0};
static inline int stats_ptr_incr( int p )
{
return ++p < s_stats_size ? p : 0;
}
static int share_result( int result, struct work *null_work, static int share_result( int result, struct work *null_work,
const char *reason ) const char *reason )
{ {
double share_time, share_hash, block_hash, share_size; double share_time = 0., share_hash = 0., block_hash = 0., share_size = 0.;
double hashcount = 0.; double hashcount = 0., hashrate = 0.;
double hashrate = 0.; uint64_t latency = 0;
uint64_t latency;
struct share_stats_t my_stats = {0}; struct share_stats_t my_stats = {0};
struct timeval ack_time, latency_tv, et; struct timeval ack_time, latency_tv, et;
char hr[32]; char hr[32];
@@ -879,37 +883,29 @@ static int share_result( int result, struct work *null_work,
char shr[32]; char shr[32];
char shr_units[4] = {0}; char shr_units[4] = {0};
char diffstr[32]; char diffstr[32];
const char *sres; const char *sres = NULL;
bool solved; bool solved = false;
// Mutex while accessing global counters. // Mutex while we grab asnapshot of the global counters.
pthread_mutex_lock( &stats_lock ); pthread_mutex_lock( &stats_lock );
// There is a window where a second share could be submitted // When submit_work detects a buffer overflow it discards the stats for
// before receiving the response for this one. When this happens // the new share. When we catch up we may get acks for shares with
// te second share will be processed from [1] on the next pass. // no stats. Leaving the get pointer un-incremented will resync with the
memcpy( &my_stats, &share_stats[ s_get_ptr], sizeof my_stats ); // put pointer.
memset( &share_stats[ s_get_ptr ], 0, sizeof my_stats ); if ( share_stats[ s_get_ptr ].submit_time.tv_sec )
s_get_ptr++;
if ( s_get_ptr >= s_stats_size )
s_get_ptr = 0;
/*
if ( share_stats[0].submit_time.tv_sec )
{ {
memcpy( &my_stats, &share_stats[0], sizeof my_stats ); memcpy( &my_stats, &share_stats[ s_get_ptr], sizeof my_stats );
memset( &share_stats[0], 0, sizeof my_stats ); memset( &share_stats[ s_get_ptr ], 0, sizeof my_stats );
} s_get_ptr = stats_ptr_incr( s_get_ptr );
else if ( share_stats[1].submit_time.tv_sec ) pthread_mutex_unlock( &stats_lock );
{
memcpy( &my_stats, &share_stats[1], sizeof my_stats );
memset( &share_stats[1], 0, sizeof my_stats );
} }
else else
{ {
memcpy( &my_stats, &share_stats[2], sizeof my_stats ); pthread_mutex_unlock( &stats_lock );
memset( &share_stats[2], 0, sizeof my_stats ); applog(LOG_WARNING,"Pending shares overflow, stats for share are lost.");
} }
*/
for ( int i = 0; i < opt_n_threads; i++ ) for ( int i = 0; i < opt_n_threads; i++ )
{ {
hashcount += thr_hashcount[i]; hashcount += thr_hashcount[i];
@@ -919,12 +915,16 @@ static int share_result( int result, struct work *null_work,
global_hashrate = hashrate; global_hashrate = hashrate;
// calculate latency and share time. // calculate latency and share time.
gettimeofday( &ack_time, NULL ); if ( my_stats.submit_time.tv_sec )
timeval_subtract( &latency_tv, &ack_time, &my_stats.submit_time ); {
latency = ( latency_tv.tv_sec * 1000 + latency_tv.tv_usec / 1000 ); gettimeofday( &ack_time, NULL );
timeval_subtract( &et, &my_stats.submit_time, &last_submit_time ); timeval_subtract( &latency_tv, &ack_time, &my_stats.submit_time );
share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1000000. ); latency = ( latency_tv.tv_sec * 1000 + latency_tv.tv_usec / 1000 );
memcpy( &last_submit_time, &my_stats.submit_time, sizeof last_submit_time ); timeval_subtract( &et, &my_stats.submit_time, &last_submit_time );
share_time = (double)et.tv_sec + ( (double)et.tv_usec / 1000000. );
memcpy( &last_submit_time, &my_stats.submit_time,
sizeof last_submit_time );
}
// calculate share hashrate and size // calculate share hashrate and size
share_hash = my_stats.share_diff * diff2hash; share_hash = my_stats.share_diff * diff2hash;
@@ -938,6 +938,8 @@ static int share_result( int result, struct work *null_work,
solved_block_count += solved ? 1 : 0 ; solved_block_count += solved ? 1 : 0 ;
// update counters for 5 minute summary report // update counters for 5 minute summary report
pthread_mutex_lock( &stats_lock );
shash_sum += share_hash; shash_sum += share_hash;
bhash_sum += block_hash; bhash_sum += block_hash;
time_sum += share_time; time_sum += share_time;
@@ -961,32 +963,38 @@ static int share_result( int result, struct work *null_work,
// colour code the share diff to highlight high value. // colour code the share diff to highlight high value.
if ( solved ) if ( solved )
sprintf( diffstr, "%s%.3g%s", CL_MAG, my_stats.share_diff, CL_WHT ); sprintf( diffstr, "%s%.3g%s", CL_MAG, my_stats.share_diff, CL_WHT );
else if ( my_stats.share_diff > (my_stats.net_diff*0.1) ) else if ( my_stats.share_diff > ( my_stats.net_diff * 0.1 ) )
sprintf( diffstr, "%s%.3g%s", CL_GRN, my_stats.share_diff, CL_WHT ); sprintf( diffstr, "%s%.3g%s", CL_GRN, my_stats.share_diff, CL_WHT );
else if ( my_stats.share_diff > (my_stats.net_diff*0.01) ) else if ( my_stats.share_diff > ( my_stats.net_diff * 0.01 ) )
sprintf( diffstr, "%s%.3g%s", CL_CYN, my_stats.share_diff, CL_WHT ); sprintf( diffstr, "%s%.3g%s", CL_CYN, my_stats.share_diff, CL_WHT );
else else
sprintf( diffstr, "%.3g", my_stats.share_diff ); sprintf( diffstr, "%.3g", my_stats.share_diff );
if ( hashrate && share_hash_rate > (768.*hashrate) ) if ( hashrate ) // don't colour share hash rate without reference rate.
sprintf( shr, "%s%.2f %sH/s%s", CL_MAG, scaled_shr, shr_units, {
CL_WHT ); if ( share_hash_rate > 768. * hashrate )
else if ( share_hash_rate > (32.*hashrate) ) sprintf( shr, "%s%.2f %sH/s%s", CL_MAG, scaled_shr, shr_units,
sprintf( shr, "%s%.2f %sH/s%s", CL_GRN, scaled_shr, shr_units, CL_WHT );
CL_WHT ); else if ( share_hash_rate > 32. * hashrate )
else if ( share_hash_rate > 2.0*hashrate ) sprintf( shr, "%s%.2f %sH/s%s", CL_GRN, scaled_shr, shr_units,
sprintf( shr, "%s%.2f %sH/s%s", CL_CYN, scaled_shr, shr_units, CL_WHT );
CL_WHT ); else if ( share_hash_rate > 2.0 * hashrate )
else if ( share_hash_rate > 0.5*hashrate ) sprintf( shr, "%s%.2f %sH/s%s", CL_CYN, scaled_shr, shr_units,
sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units ); CL_WHT );
else else if ( share_hash_rate > 0.5 * hashrate )
sprintf( shr, "%s%.2f %sH/s%s", CL_YLW, scaled_shr, shr_units, sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units );
CL_WHT ); else
sprintf( shr, "%s%.2f %sH/s%s", CL_YLW, scaled_shr, shr_units,
CL_WHT );
}
else
sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units );
} }
else else // monochrome
{ {
sres = ( solved ? "BLOCK SOLVED" : result ? "Accepted" : "Rejected" ); sres = ( solved ? "BLOCK SOLVED" : result ? "Accepted" : "Rejected" );
sprintf( diffstr, "%.3g", my_stats.share_diff ); sprintf( diffstr, "%.3g", my_stats.share_diff );
sprintf( shr, "%.2f %sH/s", scaled_shr, shr_units );
} }
scale_hash_for_display ( &hashrate, hr_units ); scale_hash_for_display ( &hashrate, hr_units );
@@ -1602,37 +1610,16 @@ bool submit_work(struct thr_info *thr, const struct work *work_in)
// collect some share stats // collect some share stats
pthread_mutex_lock( &stats_lock ); pthread_mutex_lock( &stats_lock );
gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL ); // if buffer full discard stats and don't increment pointer.
share_stats[ s_put_ptr ].share_diff = work_in->sharediff; // We're on the clock so let share_result report it.
share_stats[ s_put_ptr ].net_diff = net_diff; if ( share_stats[ s_put_ptr ].submit_time.tv_sec == 0 )
strcpy( share_stats[ s_put_ptr ].job_id, work_in->job_id ); {
gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
s_put_ptr++; share_stats[ s_put_ptr ].share_diff = work_in->sharediff;
if ( s_put_ptr >= s_stats_size ) share_stats[ s_put_ptr ].net_diff = net_diff;
s_put_ptr = 0; strcpy( share_stats[ s_put_ptr ].job_id, work_in->job_id );
/* s_put_ptr = stats_ptr_incr( s_put_ptr );
if ( share_stats[0].submit_time.tv_sec == 0 )
{
gettimeofday( &share_stats[0].submit_time, NULL );
share_stats[0].share_diff = work_in->sharediff;
share_stats[0].net_diff = net_diff;
strcpy( share_stats[0].job_id, work_in->job_id );
} }
else if ( share_stats[1].submit_time.tv_sec == 0 )
{ // previous share hasn't been confirmed yet.
gettimeofday( &share_stats[1].submit_time, NULL );
share_stats[1].share_diff = work_in->sharediff;
share_stats[1].net_diff = net_diff;
strcpy( share_stats[1].job_id, work_in->job_id );
}
else
{ // previous share hasn't been confirmed yet.
gettimeofday( &share_stats[2].submit_time, NULL );
share_stats[2].share_diff = work_in->sharediff;
share_stats[2].net_diff = net_diff;
strcpy( share_stats[2].job_id, work_in->job_id );
}
*/
pthread_mutex_unlock( &stats_lock ); pthread_mutex_unlock( &stats_lock );
@@ -1811,10 +1798,11 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
// or // or
// || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) ) // || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
// For now leave it as is, it seems stable. // For now leave it as is, it seems stable.
// strtoul seems to work.
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size ) if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) && ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| ( work->job_id != g_work->job_id ) ) ) || strtoul( work->job_id, NULL, 16 )
!= strtoul( g_work->job_id, NULL, 16 ) ) )
{ {
work_free( work ); work_free( work );
work_copy( work, g_work ); work_copy( work, g_work );
@@ -1862,9 +1850,9 @@ bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
static void *miner_thread( void *userdata ) static void *miner_thread( void *userdata )
{ {
struct work work __attribute__ ((aligned (64))) ;
struct thr_info *mythr = (struct thr_info *) userdata; struct thr_info *mythr = (struct thr_info *) userdata;
int thr_id = mythr->id; int thr_id = mythr->id;
struct work work;
uint32_t max_nonce; uint32_t max_nonce;
struct timeval et; struct timeval et;
struct timeval time_now; struct timeval time_now;
@@ -2099,9 +2087,6 @@ static void *miner_thread( void *userdata )
break; break;
} }
if ( !opt_quiet ) if ( !opt_quiet )
// applog( LOG_BLUE, "Share %d submitted by thread %d.",
// accepted_share_count + rejected_share_count + 1,
// mythr->id );
applog( LOG_BLUE, "Share %d submitted by thread %d, job %s.", applog( LOG_BLUE, "Share %d submitted by thread %d, job %s.",
accepted_share_count + rejected_share_count + 1, accepted_share_count + rejected_share_count + 1,
mythr->id, work.job_id ); mythr->id, work.job_id );
@@ -2129,7 +2114,7 @@ static void *miner_thread( void *userdata )
pthread_mutex_unlock( &stats_lock ); pthread_mutex_unlock( &stats_lock );
else else
{ {
// collect and reset counters // collect and reset global counters
double hash = shash_sum; shash_sum = 0.; double hash = shash_sum; shash_sum = 0.;
double bhash = bhash_sum; bhash_sum = 0.; double bhash = bhash_sum; bhash_sum = 0.;
double time = time_sum; time_sum = 0.; double time = time_sum; time_sum = 0.;
@@ -2175,7 +2160,10 @@ static void *miner_thread( void *userdata )
else sprintf( timestr, "%d C", temp ); else sprintf( timestr, "%d C", temp );
} }
else else
{
sprintf( shr, "%.2f %sH/s", scaled_shrate, shr_units ); sprintf( shr, "%.2f %sH/s", scaled_shrate, shr_units );
sprintf( timestr, "%d C", temp );
}
applog(LOG_NOTICE,"Submitted %d shares in %dm%02ds, %.5f%% block share.", applog(LOG_NOTICE,"Submitted %d shares in %dm%02ds, %.5f%% block share.",
(uint64_t)submits, et.tv_sec / 60, et.tv_sec % 60, avg_share ); (uint64_t)submits, et.tv_sec / 60, et.tv_sec % 60, avg_share );
@@ -2187,9 +2175,7 @@ static void *miner_thread( void *userdata )
applog(LOG_NOTICE,"Share hashrate %s, latency %d ms, temp %s.", applog(LOG_NOTICE,"Share hashrate %s, latency %d ms, temp %s.",
shr, latency, timestr ); shr, latency, timestr );
#endif #endif
// applog(LOG_NOTICE,"Performance index: %s.", hixstr );
applog(LOG_INFO,"- - - - - - - - - - - - - - - - - - - - - - - - - - -"); applog(LOG_INFO,"- - - - - - - - - - - - - - - - - - - - - - - - - - -");
} }
// display hashrate // display hashrate
@@ -2478,8 +2464,8 @@ static bool stratum_handle_response( char *buf )
val = JSON_LOADS( buf, &err ); val = JSON_LOADS( buf, &err );
if (!val) if (!val)
{ {
applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text); applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text);
goto out; goto out;
} }
res_val = json_object_get( val, "result" ); res_val = json_object_get( val, "result" );
@@ -2488,8 +2474,8 @@ static bool stratum_handle_response( char *buf )
id_val = json_object_get( val, "id" ); id_val = json_object_get( val, "id" );
if ( !id_val || json_is_null(id_val) ) if ( !id_val || json_is_null(id_val) )
goto out; goto out;
if ( !algo_gate.stratum_handle_response( val ) ) if ( !algo_gate.stratum_handle_response( val ) )
goto out; goto out;
ret = true; ret = true;
out: out:
if (val) if (val)

View File

@@ -349,7 +349,7 @@ void cpu_brand_string( char* s );
float cpu_temp( int core ); float cpu_temp( int core );
struct work { struct work {
uint32_t data[48]; uint32_t data[48] __attribute__ ((aligned (64)));
uint32_t target[8]; uint32_t target[8];
double targetdiff; double targetdiff;
@@ -401,7 +401,7 @@ struct stratum_ctx {
unsigned char *xnonce1; unsigned char *xnonce1;
size_t xnonce2_size; size_t xnonce2_size;
struct stratum_job job; struct stratum_job job;
struct work work; struct work work __attribute__ ((aligned (64)));
pthread_mutex_t work_lock; pthread_mutex_t work_lock;
int bloc_height; int bloc_height;