Merge branch 'main' of https://git.d-popov.com/popov/mines
This commit is contained in:
2
rin/miner/cpuminer/.gitattributes
vendored
2
rin/miner/cpuminer/.gitattributes
vendored
@@ -1,2 +0,0 @@
|
|||||||
# Auto detect text files and perform LF normalization
|
|
||||||
* text=auto
|
|
52
rin/miner/cpuminer/.gitignore
vendored
52
rin/miner/cpuminer/.gitignore
vendored
@@ -1,52 +0,0 @@
|
|||||||
minerd*
|
|
||||||
cpuminer
|
|
||||||
*.exe
|
|
||||||
*.o
|
|
||||||
*.d
|
|
||||||
gmon.out
|
|
||||||
|
|
||||||
autom4te.cache
|
|
||||||
.deps
|
|
||||||
|
|
||||||
Makefile
|
|
||||||
Makefile.in
|
|
||||||
INSTALL
|
|
||||||
configure.lineno
|
|
||||||
depcomp
|
|
||||||
missing
|
|
||||||
install-sh
|
|
||||||
stamp-h1
|
|
||||||
cpuminer-config.h*
|
|
||||||
compile
|
|
||||||
config.log
|
|
||||||
config.status
|
|
||||||
config.status.lineno
|
|
||||||
config.guess
|
|
||||||
config.sub
|
|
||||||
|
|
||||||
mingw32-config.cache
|
|
||||||
|
|
||||||
*/.dirstamp
|
|
||||||
*/*/.dirstamp
|
|
||||||
*/*/*/.dirstamp
|
|
||||||
*.iml
|
|
||||||
|
|
||||||
*.vcxproj.user
|
|
||||||
*.opensdf
|
|
||||||
*.sdf
|
|
||||||
*.suo
|
|
||||||
Release/
|
|
||||||
Debug/
|
|
||||||
x64/Release/
|
|
||||||
x64/Debug/
|
|
||||||
*.pdb/
|
|
||||||
|
|
||||||
installer/
|
|
||||||
res/cpuminer.aps
|
|
||||||
res/RC*
|
|
||||||
sign/
|
|
||||||
sign.sh
|
|
||||||
|
|
||||||
compat/curl-for-windows/
|
|
||||||
|
|
||||||
.vscode/
|
|
@@ -1,16 +0,0 @@
|
|||||||
language: c
|
|
||||||
|
|
||||||
compiler:
|
|
||||||
- gcc
|
|
||||||
|
|
||||||
before_install:
|
|
||||||
- sudo apt-get update -qq
|
|
||||||
- sudo apt-get install libcurl4-openssl-dev
|
|
||||||
|
|
||||||
before_script:
|
|
||||||
- ./autogen.sh
|
|
||||||
|
|
||||||
script:
|
|
||||||
- ./configure --with-crypto --with-curl
|
|
||||||
- make
|
|
||||||
- ./cpuminer --cputest
|
|
@@ -1,38 +0,0 @@
|
|||||||
Jeff Garzik <jgarzik@pobox.com>
|
|
||||||
|
|
||||||
ArtForz
|
|
||||||
|
|
||||||
pooler <pooler@litecoinpool.org>
|
|
||||||
|
|
||||||
BlueDragon747
|
|
||||||
|
|
||||||
1gh
|
|
||||||
|
|
||||||
Neisklar
|
|
||||||
|
|
||||||
prettyhatemachine
|
|
||||||
|
|
||||||
LucasJones
|
|
||||||
|
|
||||||
tpruvot@github
|
|
||||||
|
|
||||||
elmad
|
|
||||||
|
|
||||||
djm34
|
|
||||||
|
|
||||||
palmd
|
|
||||||
|
|
||||||
ig0tik3d
|
|
||||||
|
|
||||||
Wolf0
|
|
||||||
|
|
||||||
Optiminer
|
|
||||||
|
|
||||||
Jay D Dee
|
|
||||||
|
|
||||||
xcouiz@gmail.com
|
|
||||||
|
|
||||||
Cryply
|
|
||||||
|
|
||||||
Colin Percival
|
|
||||||
Alexander Peslyak
|
|
@@ -1,340 +0,0 @@
|
|||||||
GNU GENERAL PUBLIC LICENSE
|
|
||||||
Version 2, June 1991
|
|
||||||
|
|
||||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
|
||||||
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
|
||||||
of this license document, but changing it is not allowed.
|
|
||||||
|
|
||||||
Preamble
|
|
||||||
|
|
||||||
The licenses for most software are designed to take away your
|
|
||||||
freedom to share and change it. By contrast, the GNU General Public
|
|
||||||
License is intended to guarantee your freedom to share and change free
|
|
||||||
software--to make sure the software is free for all its users. This
|
|
||||||
General Public License applies to most of the Free Software
|
|
||||||
Foundation's software and to any other program whose authors commit to
|
|
||||||
using it. (Some other Free Software Foundation software is covered by
|
|
||||||
the GNU Library General Public License instead.) You can apply it to
|
|
||||||
your programs, too.
|
|
||||||
|
|
||||||
When we speak of free software, we are referring to freedom, not
|
|
||||||
price. Our General Public Licenses are designed to make sure that you
|
|
||||||
have the freedom to distribute copies of free software (and charge for
|
|
||||||
this service if you wish), that you receive source code or can get it
|
|
||||||
if you want it, that you can change the software or use pieces of it
|
|
||||||
in new free programs; and that you know you can do these things.
|
|
||||||
|
|
||||||
To protect your rights, we need to make restrictions that forbid
|
|
||||||
anyone to deny you these rights or to ask you to surrender the rights.
|
|
||||||
These restrictions translate to certain responsibilities for you if you
|
|
||||||
distribute copies of the software, or if you modify it.
|
|
||||||
|
|
||||||
For example, if you distribute copies of such a program, whether
|
|
||||||
gratis or for a fee, you must give the recipients all the rights that
|
|
||||||
you have. You must make sure that they, too, receive or can get the
|
|
||||||
source code. And you must show them these terms so they know their
|
|
||||||
rights.
|
|
||||||
|
|
||||||
We protect your rights with two steps: (1) copyright the software, and
|
|
||||||
(2) offer you this license which gives you legal permission to copy,
|
|
||||||
distribute and/or modify the software.
|
|
||||||
|
|
||||||
Also, for each author's protection and ours, we want to make certain
|
|
||||||
that everyone understands that there is no warranty for this free
|
|
||||||
software. If the software is modified by someone else and passed on, we
|
|
||||||
want its recipients to know that what they have is not the original, so
|
|
||||||
that any problems introduced by others will not reflect on the original
|
|
||||||
authors' reputations.
|
|
||||||
|
|
||||||
Finally, any free program is threatened constantly by software
|
|
||||||
patents. We wish to avoid the danger that redistributors of a free
|
|
||||||
program will individually obtain patent licenses, in effect making the
|
|
||||||
program proprietary. To prevent this, we have made it clear that any
|
|
||||||
patent must be licensed for everyone's free use or not licensed at all.
|
|
||||||
|
|
||||||
The precise terms and conditions for copying, distribution and
|
|
||||||
modification follow.
|
|
||||||
|
|
||||||
GNU GENERAL PUBLIC LICENSE
|
|
||||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
|
||||||
|
|
||||||
0. This License applies to any program or other work which contains
|
|
||||||
a notice placed by the copyright holder saying it may be distributed
|
|
||||||
under the terms of this General Public License. The "Program", below,
|
|
||||||
refers to any such program or work, and a "work based on the Program"
|
|
||||||
means either the Program or any derivative work under copyright law:
|
|
||||||
that is to say, a work containing the Program or a portion of it,
|
|
||||||
either verbatim or with modifications and/or translated into another
|
|
||||||
language. (Hereinafter, translation is included without limitation in
|
|
||||||
the term "modification".) Each licensee is addressed as "you".
|
|
||||||
|
|
||||||
Activities other than copying, distribution and modification are not
|
|
||||||
covered by this License; they are outside its scope. The act of
|
|
||||||
running the Program is not restricted, and the output from the Program
|
|
||||||
is covered only if its contents constitute a work based on the
|
|
||||||
Program (independent of having been made by running the Program).
|
|
||||||
Whether that is true depends on what the Program does.
|
|
||||||
|
|
||||||
1. You may copy and distribute verbatim copies of the Program's
|
|
||||||
source code as you receive it, in any medium, provided that you
|
|
||||||
conspicuously and appropriately publish on each copy an appropriate
|
|
||||||
copyright notice and disclaimer of warranty; keep intact all the
|
|
||||||
notices that refer to this License and to the absence of any warranty;
|
|
||||||
and give any other recipients of the Program a copy of this License
|
|
||||||
along with the Program.
|
|
||||||
|
|
||||||
You may charge a fee for the physical act of transferring a copy, and
|
|
||||||
you may at your option offer warranty protection in exchange for a fee.
|
|
||||||
|
|
||||||
2. You may modify your copy or copies of the Program or any portion
|
|
||||||
of it, thus forming a work based on the Program, and copy and
|
|
||||||
distribute such modifications or work under the terms of Section 1
|
|
||||||
above, provided that you also meet all of these conditions:
|
|
||||||
|
|
||||||
a) You must cause the modified files to carry prominent notices
|
|
||||||
stating that you changed the files and the date of any change.
|
|
||||||
|
|
||||||
b) You must cause any work that you distribute or publish, that in
|
|
||||||
whole or in part contains or is derived from the Program or any
|
|
||||||
part thereof, to be licensed as a whole at no charge to all third
|
|
||||||
parties under the terms of this License.
|
|
||||||
|
|
||||||
c) If the modified program normally reads commands interactively
|
|
||||||
when run, you must cause it, when started running for such
|
|
||||||
interactive use in the most ordinary way, to print or display an
|
|
||||||
announcement including an appropriate copyright notice and a
|
|
||||||
notice that there is no warranty (or else, saying that you provide
|
|
||||||
a warranty) and that users may redistribute the program under
|
|
||||||
these conditions, and telling the user how to view a copy of this
|
|
||||||
License. (Exception: if the Program itself is interactive but
|
|
||||||
does not normally print such an announcement, your work based on
|
|
||||||
the Program is not required to print an announcement.)
|
|
||||||
|
|
||||||
These requirements apply to the modified work as a whole. If
|
|
||||||
identifiable sections of that work are not derived from the Program,
|
|
||||||
and can be reasonably considered independent and separate works in
|
|
||||||
themselves, then this License, and its terms, do not apply to those
|
|
||||||
sections when you distribute them as separate works. But when you
|
|
||||||
distribute the same sections as part of a whole which is a work based
|
|
||||||
on the Program, the distribution of the whole must be on the terms of
|
|
||||||
this License, whose permissions for other licensees extend to the
|
|
||||||
entire whole, and thus to each and every part regardless of who wrote it.
|
|
||||||
|
|
||||||
Thus, it is not the intent of this section to claim rights or contest
|
|
||||||
your rights to work written entirely by you; rather, the intent is to
|
|
||||||
exercise the right to control the distribution of derivative or
|
|
||||||
collective works based on the Program.
|
|
||||||
|
|
||||||
In addition, mere aggregation of another work not based on the Program
|
|
||||||
with the Program (or with a work based on the Program) on a volume of
|
|
||||||
a storage or distribution medium does not bring the other work under
|
|
||||||
the scope of this License.
|
|
||||||
|
|
||||||
3. You may copy and distribute the Program (or a work based on it,
|
|
||||||
under Section 2) in object code or executable form under the terms of
|
|
||||||
Sections 1 and 2 above provided that you also do one of the following:
|
|
||||||
|
|
||||||
a) Accompany it with the complete corresponding machine-readable
|
|
||||||
source code, which must be distributed under the terms of Sections
|
|
||||||
1 and 2 above on a medium customarily used for software interchange; or,
|
|
||||||
|
|
||||||
b) Accompany it with a written offer, valid for at least three
|
|
||||||
years, to give any third party, for a charge no more than your
|
|
||||||
cost of physically performing source distribution, a complete
|
|
||||||
machine-readable copy of the corresponding source code, to be
|
|
||||||
distributed under the terms of Sections 1 and 2 above on a medium
|
|
||||||
customarily used for software interchange; or,
|
|
||||||
|
|
||||||
c) Accompany it with the information you received as to the offer
|
|
||||||
to distribute corresponding source code. (This alternative is
|
|
||||||
allowed only for noncommercial distribution and only if you
|
|
||||||
received the program in object code or executable form with such
|
|
||||||
an offer, in accord with Subsection b above.)
|
|
||||||
|
|
||||||
The source code for a work means the preferred form of the work for
|
|
||||||
making modifications to it. For an executable work, complete source
|
|
||||||
code means all the source code for all modules it contains, plus any
|
|
||||||
associated interface definition files, plus the scripts used to
|
|
||||||
control compilation and installation of the executable. However, as a
|
|
||||||
special exception, the source code distributed need not include
|
|
||||||
anything that is normally distributed (in either source or binary
|
|
||||||
form) with the major components (compiler, kernel, and so on) of the
|
|
||||||
operating system on which the executable runs, unless that component
|
|
||||||
itself accompanies the executable.
|
|
||||||
|
|
||||||
If distribution of executable or object code is made by offering
|
|
||||||
access to copy from a designated place, then offering equivalent
|
|
||||||
access to copy the source code from the same place counts as
|
|
||||||
distribution of the source code, even though third parties are not
|
|
||||||
compelled to copy the source along with the object code.
|
|
||||||
|
|
||||||
4. You may not copy, modify, sublicense, or distribute the Program
|
|
||||||
except as expressly provided under this License. Any attempt
|
|
||||||
otherwise to copy, modify, sublicense or distribute the Program is
|
|
||||||
void, and will automatically terminate your rights under this License.
|
|
||||||
However, parties who have received copies, or rights, from you under
|
|
||||||
this License will not have their licenses terminated so long as such
|
|
||||||
parties remain in full compliance.
|
|
||||||
|
|
||||||
5. You are not required to accept this License, since you have not
|
|
||||||
signed it. However, nothing else grants you permission to modify or
|
|
||||||
distribute the Program or its derivative works. These actions are
|
|
||||||
prohibited by law if you do not accept this License. Therefore, by
|
|
||||||
modifying or distributing the Program (or any work based on the
|
|
||||||
Program), you indicate your acceptance of this License to do so, and
|
|
||||||
all its terms and conditions for copying, distributing or modifying
|
|
||||||
the Program or works based on it.
|
|
||||||
|
|
||||||
6. Each time you redistribute the Program (or any work based on the
|
|
||||||
Program), the recipient automatically receives a license from the
|
|
||||||
original licensor to copy, distribute or modify the Program subject to
|
|
||||||
these terms and conditions. You may not impose any further
|
|
||||||
restrictions on the recipients' exercise of the rights granted herein.
|
|
||||||
You are not responsible for enforcing compliance by third parties to
|
|
||||||
this License.
|
|
||||||
|
|
||||||
7. If, as a consequence of a court judgment or allegation of patent
|
|
||||||
infringement or for any other reason (not limited to patent issues),
|
|
||||||
conditions are imposed on you (whether by court order, agreement or
|
|
||||||
otherwise) that contradict the conditions of this License, they do not
|
|
||||||
excuse you from the conditions of this License. If you cannot
|
|
||||||
distribute so as to satisfy simultaneously your obligations under this
|
|
||||||
License and any other pertinent obligations, then as a consequence you
|
|
||||||
may not distribute the Program at all. For example, if a patent
|
|
||||||
license would not permit royalty-free redistribution of the Program by
|
|
||||||
all those who receive copies directly or indirectly through you, then
|
|
||||||
the only way you could satisfy both it and this License would be to
|
|
||||||
refrain entirely from distribution of the Program.
|
|
||||||
|
|
||||||
If any portion of this section is held invalid or unenforceable under
|
|
||||||
any particular circumstance, the balance of the section is intended to
|
|
||||||
apply and the section as a whole is intended to apply in other
|
|
||||||
circumstances.
|
|
||||||
|
|
||||||
It is not the purpose of this section to induce you to infringe any
|
|
||||||
patents or other property right claims or to contest validity of any
|
|
||||||
such claims; this section has the sole purpose of protecting the
|
|
||||||
integrity of the free software distribution system, which is
|
|
||||||
implemented by public license practices. Many people have made
|
|
||||||
generous contributions to the wide range of software distributed
|
|
||||||
through that system in reliance on consistent application of that
|
|
||||||
system; it is up to the author/donor to decide if he or she is willing
|
|
||||||
to distribute software through any other system and a licensee cannot
|
|
||||||
impose that choice.
|
|
||||||
|
|
||||||
This section is intended to make thoroughly clear what is believed to
|
|
||||||
be a consequence of the rest of this License.
|
|
||||||
|
|
||||||
8. If the distribution and/or use of the Program is restricted in
|
|
||||||
certain countries either by patents or by copyrighted interfaces, the
|
|
||||||
original copyright holder who places the Program under this License
|
|
||||||
may add an explicit geographical distribution limitation excluding
|
|
||||||
those countries, so that distribution is permitted only in or among
|
|
||||||
countries not thus excluded. In such case, this License incorporates
|
|
||||||
the limitation as if written in the body of this License.
|
|
||||||
|
|
||||||
9. The Free Software Foundation may publish revised and/or new versions
|
|
||||||
of the General Public License from time to time. Such new versions will
|
|
||||||
be similar in spirit to the present version, but may differ in detail to
|
|
||||||
address new problems or concerns.
|
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the Program
|
|
||||||
specifies a version number of this License which applies to it and "any
|
|
||||||
later version", you have the option of following the terms and conditions
|
|
||||||
either of that version or of any later version published by the Free
|
|
||||||
Software Foundation. If the Program does not specify a version number of
|
|
||||||
this License, you may choose any version ever published by the Free Software
|
|
||||||
Foundation.
|
|
||||||
|
|
||||||
10. If you wish to incorporate parts of the Program into other free
|
|
||||||
programs whose distribution conditions are different, write to the author
|
|
||||||
to ask for permission. For software which is copyrighted by the Free
|
|
||||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
|
||||||
make exceptions for this. Our decision will be guided by the two goals
|
|
||||||
of preserving the free status of all derivatives of our free software and
|
|
||||||
of promoting the sharing and reuse of software generally.
|
|
||||||
|
|
||||||
NO WARRANTY
|
|
||||||
|
|
||||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
|
||||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
|
||||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
|
||||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
|
||||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
||||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
|
||||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
|
||||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
|
||||||
REPAIR OR CORRECTION.
|
|
||||||
|
|
||||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
|
||||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
|
||||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
|
||||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
|
||||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
|
||||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
|
||||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
|
||||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGES.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
How to Apply These Terms to Your New Programs
|
|
||||||
|
|
||||||
If you develop a new program, and you want it to be of the greatest
|
|
||||||
possible use to the public, the best way to achieve this is to make it
|
|
||||||
free software which everyone can redistribute and change under these terms.
|
|
||||||
|
|
||||||
To do so, attach the following notices to the program. It is safest
|
|
||||||
to attach them to the start of each source file to most effectively
|
|
||||||
convey the exclusion of warranty; and each file should have at least
|
|
||||||
the "copyright" line and a pointer to where the full notice is found.
|
|
||||||
|
|
||||||
<one line to give the program's name and a brief idea of what it does.>
|
|
||||||
Copyright (C) <year> <name of author>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
|
|
||||||
Also add information on how to contact you by electronic and paper mail.
|
|
||||||
|
|
||||||
If the program is interactive, make it output a short notice like this
|
|
||||||
when it starts in an interactive mode:
|
|
||||||
|
|
||||||
Gnomovision version 69, Copyright (C) year name of author
|
|
||||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
|
||||||
This is free software, and you are welcome to redistribute it
|
|
||||||
under certain conditions; type `show c' for details.
|
|
||||||
|
|
||||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
|
||||||
parts of the General Public License. Of course, the commands you use may
|
|
||||||
be called something other than `show w' and `show c'; they could even be
|
|
||||||
mouse-clicks or menu items--whatever suits your program.
|
|
||||||
|
|
||||||
You should also get your employer (if you work as a programmer) or your
|
|
||||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
|
||||||
necessary. Here is a sample; alter the names:
|
|
||||||
|
|
||||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
|
||||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
|
||||||
|
|
||||||
<signature of Ty Coon>, 1 April 1989
|
|
||||||
Ty Coon, President of Vice
|
|
||||||
|
|
||||||
This General Public License does not permit incorporating your program into
|
|
||||||
proprietary programs. If your program is a subroutine library, you may
|
|
||||||
consider it more useful to permit linking proprietary applications with the
|
|
||||||
library. If this is what you want to do, use the GNU Library General
|
|
||||||
Public License instead of this License.
|
|
@@ -1 +0,0 @@
|
|||||||
See git repository ('git log') for full changelog.
|
|
@@ -1,35 +0,0 @@
|
|||||||
#
|
|
||||||
# Dockerfile for cpuminer-opt
|
|
||||||
# usage: docker build -t cpuminer-opt:latest .
|
|
||||||
# run: docker run -it --rm cpuminer-opt:latest [ARGS]
|
|
||||||
# ex: docker run -it --rm cpuminer-opt:latest -a cryptonight -o cryptonight.eu.nicehash.com:3355 -u 1MiningDW2GKzf4VQfmp4q2XoUvR6iy6PD.worker1 -p x -t 3
|
|
||||||
#
|
|
||||||
|
|
||||||
# Build
|
|
||||||
FROM ubuntu:16.04 as builder
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
build-essential \
|
|
||||||
libssl-dev \
|
|
||||||
libgmp-dev \
|
|
||||||
libcurl4-openssl-dev \
|
|
||||||
libjansson-dev \
|
|
||||||
automake \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
COPY . /app/
|
|
||||||
RUN cd /app/ && ./build.sh
|
|
||||||
|
|
||||||
# App
|
|
||||||
FROM ubuntu:16.04
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
libcurl3 \
|
|
||||||
libjansson4 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
COPY --from=builder /app/cpuminer .
|
|
||||||
ENTRYPOINT ["./cpuminer"]
|
|
||||||
CMD ["-h"]
|
|
@@ -1,164 +0,0 @@
|
|||||||
|
|
||||||
These instructions may be out of date, see the Wiki for the latest...
|
|
||||||
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
|
||||||
|
|
||||||
1. Requirements:
|
|
||||||
---------------
|
|
||||||
|
|
||||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
|
||||||
supported.
|
|
||||||
64 bit Linux operating system. Apple is not supported.
|
|
||||||
|
|
||||||
2. Building on linux prerequisites:
|
|
||||||
-----------------------------------
|
|
||||||
|
|
||||||
It is assumed users know how to install packages on their system and
|
|
||||||
be able to compile standard source packages. This is basic Linux and
|
|
||||||
beyond the scope of cpuminer-opt. Regardless compiling is trivial if you
|
|
||||||
follow the instructions.
|
|
||||||
|
|
||||||
Make sure you have the basic development packages installed.
|
|
||||||
Here is a good start:
|
|
||||||
|
|
||||||
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
|
|
||||||
|
|
||||||
Install any additional dependencies needed by cpuminer-opt. The list below
|
|
||||||
are some of the ones that may not be in the default install and need to
|
|
||||||
be installed manually. There may be others, read the compiler error messages,
|
|
||||||
they will give a clue as to the missing package.
|
|
||||||
|
|
||||||
The following command should install everything you need on Debian based
|
|
||||||
distributions such as Ubuntu. Fedora and other distributions may have similar
|
|
||||||
but different package names.
|
|
||||||
|
|
||||||
$ sudo apt-get install build-essential automake libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev zlib1g-dev git
|
|
||||||
|
|
||||||
SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and
|
|
||||||
openssl 1.1.0e or higher.
|
|
||||||
|
|
||||||
znver1 and znver2 should be recognized on most recent version of GCC and
|
|
||||||
znver3 is available with GCC 11. GCC 11 also includes rocketlake support.
|
|
||||||
In the meantime here are some suggestions to compile with new CPUs:
|
|
||||||
|
|
||||||
"-march=native" is usually the best choice, used by build.sh.
|
|
||||||
|
|
||||||
"-march=znver2 -mvaes" can be used for Ryzen 5000 if znver3 is not recongized.
|
|
||||||
|
|
||||||
"-mcascadelake -msha" or
|
|
||||||
"-mcometlake -mavx512 -msha" can be used for Rocket Lake.
|
|
||||||
|
|
||||||
Features can also be added individually:
|
|
||||||
|
|
||||||
"-msha" adds support for HW accelerated sha256.
|
|
||||||
|
|
||||||
"-mavx512" adds support for 512 bit vectors
|
|
||||||
|
|
||||||
"-mvaes" add support for parallel AES
|
|
||||||
|
|
||||||
Additional instructions for static compilalation can be found here:
|
|
||||||
https://lxadm.com/Static_compilation_of_cpuminer
|
|
||||||
Static builds should only considered in a homogeneous HW and SW environment.
|
|
||||||
Local builds will always have the best performance and compatibility.
|
|
||||||
|
|
||||||
3. Download cpuminer-opt
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Download the source code for the latest realease from the official repository.
|
|
||||||
|
|
||||||
https://github.com/JayDDee/cpuminer-opt/releases
|
|
||||||
|
|
||||||
Extract the source code.
|
|
||||||
|
|
||||||
$ tar xvzf cpuminer-opt-x.y.z.tar.gz
|
|
||||||
|
|
||||||
|
|
||||||
Alternatively it can be cloned from git.
|
|
||||||
|
|
||||||
$ git clone https://github.com/JayDDee/cpuminer-opt.git
|
|
||||||
|
|
||||||
4. Build cpuminer-opt
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
It is recomended to Build with default options, this will usuallly
|
|
||||||
produce the best results.
|
|
||||||
|
|
||||||
$ ./build.sh to build on Linux or execute the following commands.
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
$ ./autogen.sh
|
|
||||||
$ CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
|
||||||
$ make -j n
|
|
||||||
|
|
||||||
n is the number of threads.
|
|
||||||
|
|
||||||
5. Start mining.
|
|
||||||
----------------
|
|
||||||
|
|
||||||
$ ./cpuminer -a algo -o url -u username -p password
|
|
||||||
|
|
||||||
|
|
||||||
Windows
|
|
||||||
-------
|
|
||||||
|
|
||||||
See also INSTAL_WINDOWS
|
|
||||||
|
|
||||||
The following procedure is obsolete and uses an old compiler.
|
|
||||||
|
|
||||||
Precompiled Windows binaries are built on a Linux host using Mingw
|
|
||||||
with a more recent compiler than the following Windows hosted procedure.
|
|
||||||
|
|
||||||
Building on Windows prerequisites:
|
|
||||||
|
|
||||||
msys
|
|
||||||
mingw_w64
|
|
||||||
Visual C++ redistributable 2008 X64
|
|
||||||
openssl
|
|
||||||
|
|
||||||
Install msys and mingw_w64, only needed once.
|
|
||||||
|
|
||||||
Unpack msys into C:\msys or your preferred directory.
|
|
||||||
|
|
||||||
Install mingw_w64 from win-builds.
|
|
||||||
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
|
|
||||||
existing msys instalation.
|
|
||||||
|
|
||||||
Open a msys shell by double clicking on msys.bat.
|
|
||||||
Note that msys shell uses linux syntax for file specifications, "C:\" is
|
|
||||||
mounted at "/c/".
|
|
||||||
|
|
||||||
Add mingw bin directory to PATH variable
|
|
||||||
PATH="/c/msys/opt/windows_64/bin/:$PATH"
|
|
||||||
|
|
||||||
Instalation complete, compile cpuminer-opt.
|
|
||||||
|
|
||||||
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
|
|
||||||
or similar Windows program.
|
|
||||||
|
|
||||||
In msys shell cd to miner directory.
|
|
||||||
cd /c/path/to/cpuminer-opt
|
|
||||||
|
|
||||||
Run build.sh to build on Windows or execute the following commands.
|
|
||||||
|
|
||||||
./autogen.sh
|
|
||||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
|
||||||
make
|
|
||||||
|
|
||||||
Start mining
|
|
||||||
|
|
||||||
cpuminer.exe -a algo -o url -u user -p password
|
|
||||||
|
|
||||||
The following tips may be useful for older AMD CPUs.
|
|
||||||
|
|
||||||
AMD CPUs older than Steamroller, including Athlon x2 and Phenom II x4, are
|
|
||||||
not supported by cpuminer-opt due to an incompatible implementation of SSE2
|
|
||||||
on these CPUs. Some algos may crash the miner with an invalid instruction.
|
|
||||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
|
||||||
|
|
||||||
Some users with AMD CPUs without AES_NI have reported problems compiling
|
|
||||||
with build.sh or "-march=native". Problems have included compile errors
|
|
||||||
and poor performance. These users are recommended to compile manually
|
|
||||||
specifying "-march=btver1" on the configure command line.
|
|
||||||
|
|
||||||
Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
|
||||||
|
|
@@ -1,4 +0,0 @@
|
|||||||
Please consult the wiki for Windows compile instructions.
|
|
||||||
|
|
||||||
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
|
||||||
|
|
@@ -1,3 +0,0 @@
|
|||||||
cpuminer is available under the terms of the GNU Public License version 2.
|
|
||||||
|
|
||||||
See COPYING for details.
|
|
@@ -1,343 +0,0 @@
|
|||||||
|
|
||||||
if HAVE_APPLE
|
|
||||||
# MacOS uses Homebrew to install needed packages but they aren't linked for
|
|
||||||
# the jansson test in configure. Ignore the failed test & link them now,
|
|
||||||
# different path for different CPU arch.
|
|
||||||
|
|
||||||
if ARCH_ARM64
|
|
||||||
EXTRA_INCLUDES = -I/opt/homebrew/include
|
|
||||||
EXTRA_LIBS = -L/opt/homebrew/lib
|
|
||||||
else
|
|
||||||
EXTRA_INCLUDES = -I/usr/local/include
|
|
||||||
EXTRA_LIBS = -L/usr/local/lib
|
|
||||||
endif
|
|
||||||
|
|
||||||
else
|
|
||||||
|
|
||||||
if WANT_JANSSON
|
|
||||||
# Can't find jansson libraries, compile the included source code.
|
|
||||||
EXTRA_INCLUDES = -I$(top_srcdir)/compat/jansson
|
|
||||||
EXTRA_LIBS = -L$(top_srcdir)/compat/jansson
|
|
||||||
else
|
|
||||||
EXTRA_INCLUDES =
|
|
||||||
EXTRA_LIBS =
|
|
||||||
endif
|
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
EXTRA_DIST = example-cfg.json nomacro.pl
|
|
||||||
|
|
||||||
SUBDIRS = compat
|
|
||||||
|
|
||||||
ALL_INCLUDES = @PTHREAD_FLAGS@ -fno-strict-aliasing $(EXTRA_INCLUDES) -I.
|
|
||||||
|
|
||||||
bin_PROGRAMS = cpuminer
|
|
||||||
|
|
||||||
dist_man_MANS = cpuminer.1
|
|
||||||
|
|
||||||
cpuminer_SOURCES = \
|
|
||||||
dummy.cpp \
|
|
||||||
cpu-miner.c \
|
|
||||||
util.c \
|
|
||||||
api.c \
|
|
||||||
sysinfos.c \
|
|
||||||
algo-gate-api.c\
|
|
||||||
malloc-huge.c \
|
|
||||||
algo/argon2d/argon2d-gate.c \
|
|
||||||
algo/argon2d/blake2/blake2b.c \
|
|
||||||
algo/argon2d/argon2d/argon2.c \
|
|
||||||
algo/argon2d/argon2d/core.c \
|
|
||||||
algo/argon2d/argon2d/opt.c \
|
|
||||||
algo/argon2d/argon2d/argon2d_thread.c \
|
|
||||||
algo/argon2d/argon2d/encoding.c \
|
|
||||||
algo/blake/sph_blake.c \
|
|
||||||
algo/blake/blake256-hash.c \
|
|
||||||
algo/blake/blake512-hash.c \
|
|
||||||
algo/blake/blake-gate.c \
|
|
||||||
algo/blake/blake.c \
|
|
||||||
algo/blake/blake-4way.c \
|
|
||||||
algo/blake/sph_blake2b.c \
|
|
||||||
algo/blake/sph-blake2s.c \
|
|
||||||
algo/blake/blake2s-hash.c \
|
|
||||||
algo/blake/blake2s.c \
|
|
||||||
algo/blake/blake2b-hash.c \
|
|
||||||
algo/blake/blake2b.c \
|
|
||||||
algo/blake/blakecoin-gate.c \
|
|
||||||
algo/blake/mod_blakecoin.c \
|
|
||||||
algo/blake/blakecoin.c \
|
|
||||||
algo/blake/blakecoin-4way.c \
|
|
||||||
algo/blake/pentablake-gate.c \
|
|
||||||
algo/blake/pentablake-4way.c \
|
|
||||||
algo/blake/pentablake.c \
|
|
||||||
algo/bmw/sph_bmw.c \
|
|
||||||
algo/bmw/bmw256-hash-4way.c \
|
|
||||||
algo/bmw/bmw512-hash-4way.c \
|
|
||||||
algo/bmw/bmw256.c \
|
|
||||||
algo/bmw/bmw512-gate.c \
|
|
||||||
algo/bmw/bmw512.c \
|
|
||||||
algo/bmw/bmw512-4way.c \
|
|
||||||
algo/cubehash/cubehash_sse2.c\
|
|
||||||
algo/cubehash/cube-hash-2way.c \
|
|
||||||
algo/cubehash/sph_cubehash.c \
|
|
||||||
algo/echo/sph_echo.c \
|
|
||||||
algo/echo/echo-hash-4way.c \
|
|
||||||
algo/echo/aes_ni/hash.c\
|
|
||||||
algo/gost/sph_gost.c \
|
|
||||||
algo/groestl/groestl-gate.c \
|
|
||||||
algo/groestl/groestl512-hash-4way.c \
|
|
||||||
algo/groestl/groestl256-hash-4way.c \
|
|
||||||
algo/groestl/sph_groestl.c \
|
|
||||||
algo/groestl/groestl.c \
|
|
||||||
algo/groestl/groestl-4way.c \
|
|
||||||
algo/groestl/myrgr-gate.c \
|
|
||||||
algo/groestl/myrgr-4way.c \
|
|
||||||
algo/groestl/myr-groestl.c \
|
|
||||||
algo/groestl/aes_ni/hash-groestl.c \
|
|
||||||
algo/groestl/aes_ni/hash-groestl256.c \
|
|
||||||
algo/fugue/sph_fugue.c \
|
|
||||||
algo/fugue/fugue-aesni.c \
|
|
||||||
algo/hamsi/sph_hamsi.c \
|
|
||||||
algo/hamsi/hamsi-hash-4way.c \
|
|
||||||
algo/haval/haval.c \
|
|
||||||
algo/haval/haval-hash-4way.c \
|
|
||||||
algo/jh/sph_jh.c \
|
|
||||||
algo/jh/jh-hash-4way.c \
|
|
||||||
algo/jh/jha-gate.c \
|
|
||||||
algo/jh/jha-4way.c \
|
|
||||||
algo/jh/jha.c \
|
|
||||||
algo/keccak/sph_keccak.c \
|
|
||||||
algo/keccak/keccak.c\
|
|
||||||
algo/keccak/keccak-hash-4way.c \
|
|
||||||
algo/keccak/keccak-4way.c\
|
|
||||||
algo/keccak/keccak-gate.c \
|
|
||||||
algo/keccak/sha3d-4way.c \
|
|
||||||
algo/keccak/sha3d.c \
|
|
||||||
algo/lanehash/lane.c \
|
|
||||||
algo/luffa/luffa_for_sse2.c \
|
|
||||||
algo/luffa/luffa-hash-2way.c \
|
|
||||||
algo/luffa/sph_luffa.c \
|
|
||||||
algo/lyra2/lyra2.c \
|
|
||||||
algo/lyra2/sponge.c \
|
|
||||||
algo/lyra2/sponge-2way.c \
|
|
||||||
algo/lyra2/lyra2-hash-2way.c \
|
|
||||||
algo/lyra2/lyra2-gate.c \
|
|
||||||
algo/lyra2/lyra2rev2.c \
|
|
||||||
algo/lyra2/lyra2rev2-4way.c \
|
|
||||||
algo/lyra2/lyra2rev3.c \
|
|
||||||
algo/lyra2/lyra2rev3-4way.c \
|
|
||||||
algo/lyra2/lyra2re.c \
|
|
||||||
algo/lyra2/lyra2z-4way.c \
|
|
||||||
algo/lyra2/lyra2z330.c \
|
|
||||||
algo/lyra2/lyra2h.c \
|
|
||||||
algo/lyra2/lyra2h-4way.c \
|
|
||||||
algo/lyra2/allium-4way.c \
|
|
||||||
algo/lyra2/phi2-4way.c \
|
|
||||||
algo/lyra2/phi2.c \
|
|
||||||
algo/m7m/m7m.c \
|
|
||||||
algo/nist5/nist5-gate.c \
|
|
||||||
algo/nist5/nist5-4way.c \
|
|
||||||
algo/nist5/nist5.c \
|
|
||||||
algo/nist5/zr5.c \
|
|
||||||
algo/panama/panama-hash-4way.c \
|
|
||||||
algo/panama/sph_panama.c \
|
|
||||||
algo/quark/quark-gate.c \
|
|
||||||
algo/quark/quark.c \
|
|
||||||
algo/quark/quark-4way.c \
|
|
||||||
algo/quark/anime-gate.c \
|
|
||||||
algo/quark/anime.c \
|
|
||||||
algo/quark/anime-4way.c \
|
|
||||||
algo/quark/hmq1725-gate.c \
|
|
||||||
algo/quark/hmq1725-4way.c \
|
|
||||||
algo/quark/hmq1725.c \
|
|
||||||
algo/qubit/qubit-gate.c \
|
|
||||||
algo/qubit/qubit.c \
|
|
||||||
algo/qubit/qubit-2way.c \
|
|
||||||
algo/qubit/deep-gate.c \
|
|
||||||
algo/qubit/deep-2way.c \
|
|
||||||
algo/qubit/deep.c \
|
|
||||||
algo/ripemd/sph_ripemd.c \
|
|
||||||
algo/ripemd/ripemd-hash-4way.c \
|
|
||||||
algo/ripemd/lbry-gate.c \
|
|
||||||
algo/ripemd/lbry.c \
|
|
||||||
algo/ripemd/lbry-4way.c \
|
|
||||||
algo/scrypt/scrypt.c \
|
|
||||||
algo/scrypt/scrypt-core-4way.c \
|
|
||||||
algo/scrypt/neoscrypt.c \
|
|
||||||
algo/sha/sha1.c \
|
|
||||||
algo/sha/sha1-hash.c \
|
|
||||||
algo/sha/sha256-hash.c \
|
|
||||||
algo/sha/sph_sha2.c \
|
|
||||||
algo/sha/sph_sha2big.c \
|
|
||||||
algo/sha/sha256-hash-4way.c \
|
|
||||||
algo/sha/sha512-hash-4way.c \
|
|
||||||
algo/sha/hmac-sha256-hash.c \
|
|
||||||
algo/sha/hmac-sha256-hash-4way.c \
|
|
||||||
algo/sha/sha256d.c \
|
|
||||||
algo/sha/sha256d-4way.c \
|
|
||||||
algo/sha/sha256t-gate.c \
|
|
||||||
algo/sha/sha256t-4way.c \
|
|
||||||
algo/sha/sha256q-4way.c \
|
|
||||||
algo/sha/sha256q.c \
|
|
||||||
algo/sha/sha512256d-4way.c \
|
|
||||||
algo/sha/sha256dt.c \
|
|
||||||
algo/shabal/sph_shabal.c \
|
|
||||||
algo/shabal/shabal-hash-4way.c \
|
|
||||||
algo/shavite/sph_shavite.c \
|
|
||||||
algo/shavite/sph-shavite-aesni.c \
|
|
||||||
algo/shavite/shavite-hash-2way.c \
|
|
||||||
algo/shavite/shavite-hash-4way.c \
|
|
||||||
algo/simd/sph_simd.c \
|
|
||||||
algo/simd/simd-hash-2way.c \
|
|
||||||
algo/skein/sph_skein.c \
|
|
||||||
algo/skein/skein-hash-4way.c \
|
|
||||||
algo/skein/skein.c \
|
|
||||||
algo/skein/skein-4way.c \
|
|
||||||
algo/skein/skein-gate.c \
|
|
||||||
algo/skein/skein2.c \
|
|
||||||
algo/skein/skein2-4way.c \
|
|
||||||
algo/sm3/sm3.c \
|
|
||||||
algo/sm3/sm3-hash-4way.c \
|
|
||||||
algo/swifftx/swifftx.c \
|
|
||||||
algo/tiger/sph_tiger.c \
|
|
||||||
algo/verthash/verthash-gate.c \
|
|
||||||
algo/verthash/Verthash.c \
|
|
||||||
algo/verthash/fopen_utf8.c \
|
|
||||||
algo/verthash/tiny_sha3/sha3.c \
|
|
||||||
algo/verthash/tiny_sha3/sha3-4way.c \
|
|
||||||
algo/whirlpool/sph_whirlpool.c \
|
|
||||||
algo/whirlpool/whirlpool-gate.c \
|
|
||||||
algo/whirlpool/whirlpool.c \
|
|
||||||
algo/whirlpool/whirlpoolx.c \
|
|
||||||
algo/x11/x11-gate.c \
|
|
||||||
algo/x11/x11.c \
|
|
||||||
algo/x11/x11-4way.c \
|
|
||||||
algo/x11/x11gost-gate.c \
|
|
||||||
algo/x11/x11gost.c \
|
|
||||||
algo/x11/x11gost-4way.c \
|
|
||||||
algo/x11/c11-gate.c \
|
|
||||||
algo/x11/c11.c \
|
|
||||||
algo/x11/c11-4way.c \
|
|
||||||
algo/x11/tribus-gate.c \
|
|
||||||
algo/x11/tribus.c \
|
|
||||||
algo/x11/tribus-4way.c \
|
|
||||||
algo/x11/timetravel-gate.c \
|
|
||||||
algo/x11/timetravel.c \
|
|
||||||
algo/x11/timetravel-4way.c \
|
|
||||||
algo/x11/timetravel10-gate.c \
|
|
||||||
algo/x11/timetravel10.c \
|
|
||||||
algo/x11/timetravel10-4way.c \
|
|
||||||
algo/x11/x11evo.c \
|
|
||||||
algo/x11/x11evo-4way.c \
|
|
||||||
algo/x11/x11evo-gate.c \
|
|
||||||
algo/x12/x12-gate.c \
|
|
||||||
algo/x12/x12.c \
|
|
||||||
algo/x12/x12-4way.c \
|
|
||||||
algo/x13/x13-gate.c \
|
|
||||||
algo/x13/x13.c \
|
|
||||||
algo/x13/x13-4way.c \
|
|
||||||
algo/x13/x13sm3-gate.c \
|
|
||||||
algo/x13/x13sm3.c \
|
|
||||||
algo/x13/x13sm3-4way.c \
|
|
||||||
algo/x13/phi1612-gate.c \
|
|
||||||
algo/x13/phi1612.c \
|
|
||||||
algo/x13/phi1612-4way.c \
|
|
||||||
algo/x13/skunk-gate.c \
|
|
||||||
algo/x13/skunk-4way.c \
|
|
||||||
algo/x13/skunk.c \
|
|
||||||
algo/x13/x13bcd-4way.c \
|
|
||||||
algo/x13/x13bcd.c \
|
|
||||||
algo/x14/x14-gate.c \
|
|
||||||
algo/x14/x14.c \
|
|
||||||
algo/x14/x14-4way.c \
|
|
||||||
algo/x14/veltor-gate.c \
|
|
||||||
algo/x14/veltor.c \
|
|
||||||
algo/x14/veltor-4way.c \
|
|
||||||
algo/x14/polytimos-gate.c \
|
|
||||||
algo/x14/polytimos.c \
|
|
||||||
algo/x14/polytimos-4way.c \
|
|
||||||
algo/x14/axiom.c \
|
|
||||||
algo/x15/x15-gate.c \
|
|
||||||
algo/x15/x15.c \
|
|
||||||
algo/x15/x15-4way.c \
|
|
||||||
algo/x16/x16r-gate.c \
|
|
||||||
algo/x16/x16r.c \
|
|
||||||
algo/x16/x16r-4way.c \
|
|
||||||
algo/x16/x16rv2.c \
|
|
||||||
algo/x16/x16rv2-4way.c \
|
|
||||||
algo/x16/x16rt.c \
|
|
||||||
algo/x16/x16rt-4way.c \
|
|
||||||
algo/x16/hex.c \
|
|
||||||
algo/x16/x20r.c \
|
|
||||||
algo/x16/x21s-4way.c \
|
|
||||||
algo/x16/x21s.c \
|
|
||||||
algo/x16/minotaur.c \
|
|
||||||
algo/x17/x17-gate.c \
|
|
||||||
algo/x17/x17.c \
|
|
||||||
algo/x17/x17-4way.c \
|
|
||||||
algo/x17/xevan-gate.c \
|
|
||||||
algo/x17/xevan.c \
|
|
||||||
algo/x17/xevan-4way.c \
|
|
||||||
algo/x17/sonoa-gate.c \
|
|
||||||
algo/x17/sonoa-4way.c \
|
|
||||||
algo/x17/sonoa.c \
|
|
||||||
algo/x22/x22i-4way.c \
|
|
||||||
algo/x22/x22i.c \
|
|
||||||
algo/x22/x22i-gate.c \
|
|
||||||
algo/x22/x25x.c \
|
|
||||||
algo/x22/x25x-4way.c \
|
|
||||||
algo/yespower/yespower-gate.c \
|
|
||||||
algo/yespower/yespower-blake2b.c \
|
|
||||||
algo/yespower/crypto/hmac-blake2b.c \
|
|
||||||
algo/yespower/yescrypt-r8g.c \
|
|
||||||
algo/yespower/yespower-opt.c \
|
|
||||||
algo/yespower/yespower-ref.c \
|
|
||||||
algo/yespower/yespower-blake2b-ref.c \
|
|
||||||
algo/rinhash/rinhash.c \
|
|
||||||
algo/rinhash/blake3/blake3.c \
|
|
||||||
algo/rinhash/blake3/blake3_dispatch.c \
|
|
||||||
algo/rinhash/blake3/blake3_portable.c \
|
|
||||||
algo/rinhash/blake3/blake3_sse2_x86-64_unix.S \
|
|
||||||
algo/rinhash/blake3/blake3_sse41_x86-64_unix.S \
|
|
||||||
algo/rinhash/blake3/blake3_avx2_x86-64_unix.S \
|
|
||||||
algo/rinhash/blake3/blake3_avx512_x86-64_unix.S \
|
|
||||||
algo/rinhash/sha3/SimpleFIPS202.c \
|
|
||||||
algo/rinhash/sha3/KeccakSponge.c \
|
|
||||||
algo/rinhash/sha3/KeccakP-1600-reference.c
|
|
||||||
|
|
||||||
|
|
||||||
if HAVE_WINDOWS
|
|
||||||
cpuminer_SOURCES += compat/winansi.c
|
|
||||||
endif
|
|
||||||
|
|
||||||
if USE_ASM
|
|
||||||
disable_flags =
|
|
||||||
cpuminer_SOURCES += asm/neoscrypt_asm.S
|
|
||||||
else
|
|
||||||
disable_flags = -DNOASM
|
|
||||||
endif
|
|
||||||
|
|
||||||
cpuminer_LDFLAGS = @LDFLAGS@
|
|
||||||
cpuminer_LDADD = $(EXTRA_LIBS) @LIBCURL@ -ljansson @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
|
|
||||||
cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES) -DXKCP_has_KeccakP1600
|
|
||||||
cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 $(disable_flags)
|
|
||||||
|
|
||||||
if ARCH_ARM64
|
|
||||||
cpuminer_CFLAGS += -flax-vector-conversions
|
|
||||||
endif
|
|
||||||
|
|
||||||
if HAVE_WINDOWS
|
|
||||||
|
|
||||||
# use to profile an object
|
|
||||||
# gprof_cflags = -pg -g3
|
|
||||||
# cpuminer_LDFLAGS += -pg
|
|
||||||
# cpuminer_CFLAGS += -fno-inline-functions -static
|
|
||||||
|
|
||||||
# copy/paste from generated Makefile
|
|
||||||
common_ccflags = $(DEFS) $(ALL_INCLUDES) $(cpuminer_CPPFLAGS) $(CPPFLAGS) $(cpuminer_CFLAGS) $(CFLAGS)
|
|
||||||
|
|
||||||
# special CFLAGS (if you find a simpler way to do that tell me ;)
|
|
||||||
cpuminer-neoscrypt.o: neoscrypt.c
|
|
||||||
@echo "CUSTOM ${@}: ${filter %.o,${^}} ${filter %.c,${^}}"
|
|
||||||
$(CC) $(common_ccflags) -g -O3 $(gprof_cflags) -MT $@ -MD -MP -c -o $@ $<
|
|
||||||
|
|
||||||
endif
|
|
@@ -1,213 +0,0 @@
|
|||||||
cpuminer-opt is a fork of cpuminer-multi by TPruvot with optimizations
|
|
||||||
imported from other miners developped by lucas Jones, djm34, Wolf0, pooler,
|
|
||||||
Jeff garzik, ig0tik3d, elmad, palmd, and Optiminer, with additional
|
|
||||||
optimizations by Jay D Dee.
|
|
||||||
|
|
||||||
All of the code is believed to be open and free. If anyone has a
|
|
||||||
claim to any of it post your case in the cpuminer-opt Bitcoin Talk forum
|
|
||||||
or by email.
|
|
||||||
|
|
||||||
Miner programs are often flagged as malware by antivirus programs. This is
|
|
||||||
a false positive, they are flagged simply because they are cryptocurrency
|
|
||||||
miners. The source code is open for anyone to inspect. If you don't trust
|
|
||||||
the software, don't use it.
|
|
||||||
|
|
||||||
|
|
||||||
New thread:
|
|
||||||
|
|
||||||
https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
|
|
||||||
|
|
||||||
Old thread:
|
|
||||||
|
|
||||||
https://bitcointalk.org/index.php?topic=1326803.0
|
|
||||||
|
|
||||||
mailto://jayddee246@gmail.com
|
|
||||||
|
|
||||||
This note is to confirm that bitcointalk users JayDDee and joblo are the
|
|
||||||
same person.
|
|
||||||
|
|
||||||
I created a new BCT user JayDDee to match my github user id.
|
|
||||||
The old thread has been locked but still contains useful information for
|
|
||||||
reading.
|
|
||||||
|
|
||||||
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
|
|
||||||
for compile instructions.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
1. A 64 bit CPU supporting x86_64 (Intel or AMD) or aarch64 (ARM).
|
|
||||||
x86_64 requires SSE2, aarch64 requires armv8 & NEON.
|
|
||||||
|
|
||||||
Mobile CPUs like laptop computers are not recommended because they aren't
|
|
||||||
designed for extreme heat of operating at full load for extended periods of
|
|
||||||
time.
|
|
||||||
|
|
||||||
2. 64 bit operating system including Linux, Windows, MacOS, or BSD.
|
|
||||||
Android, IOS and alt OSs like Haiku & ReactOS are not supported.
|
|
||||||
|
|
||||||
3. Stratum pool supporting stratum+tcp:// or stratum+ssl:// protocols or
|
|
||||||
RPC getblockte,plate using http:// or https://.
|
|
||||||
|
|
||||||
Supported Algorithms
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
allium Garlicoin
|
|
||||||
anime Animecoin
|
|
||||||
argon2 Argon2 coin (AR2)
|
|
||||||
argon2d250
|
|
||||||
argon2d500
|
|
||||||
argon2d4096
|
|
||||||
blake Blake-256
|
|
||||||
blake2b Blake2-512
|
|
||||||
blake2s Blake2-256
|
|
||||||
blakecoin blake256r8
|
|
||||||
bmw BMW 256
|
|
||||||
bmw512 BMW 512
|
|
||||||
c11
|
|
||||||
decred
|
|
||||||
deep Deepcoin (DCN)
|
|
||||||
dmd-gr Diamond-Groestl
|
|
||||||
groestl Groestl coin
|
|
||||||
hex x16r-hex
|
|
||||||
hmq1725
|
|
||||||
jha Jackpotcoin
|
|
||||||
keccak Maxcoin
|
|
||||||
keccakc Creative coin
|
|
||||||
lbry LBC, LBRY Credits
|
|
||||||
lyra2h
|
|
||||||
lyra2re lyra2
|
|
||||||
lyra2rev2 lyra2v2
|
|
||||||
lyra2rev3 lyrav2v3
|
|
||||||
lyra2z
|
|
||||||
lyra2z330
|
|
||||||
m7m
|
|
||||||
minotaur
|
|
||||||
minotaurx
|
|
||||||
myr-gr Myriad-Groestl
|
|
||||||
neoscrypt NeoScrypt(128, 2, 1)
|
|
||||||
nist5 Nist5
|
|
||||||
pentablake Pentablake
|
|
||||||
phi1612 phi
|
|
||||||
phi2
|
|
||||||
polytimos Ninja
|
|
||||||
power2b MicroBitcoin (MBC)
|
|
||||||
quark Quark
|
|
||||||
qubit Qubit
|
|
||||||
scrypt scrypt(1024, 1, 1) (default)
|
|
||||||
scrypt:N scrypt(N, 1, 1)
|
|
||||||
scryptn2 scrypt(1048576, 1, 1)
|
|
||||||
sha256d Double SHA-256
|
|
||||||
sha256dt
|
|
||||||
sha256q Quad SHA-256
|
|
||||||
sha256t Triple SHA-256
|
|
||||||
sha3d Double keccak256 (BSHA3)
|
|
||||||
sha512256d
|
|
||||||
skein Skein+Sha (Skeincoin)
|
|
||||||
skein2 Double Skein (Woodcoin)
|
|
||||||
skunk Signatum (SIGT)
|
|
||||||
sonoa Sono
|
|
||||||
timetravel Machinecoin (MAC)
|
|
||||||
timetravel10 Bitcore
|
|
||||||
tribus Denarius (DNR)
|
|
||||||
vanilla blake256r8vnl (VCash)
|
|
||||||
veltor (VLT)
|
|
||||||
verthash Vertcoin
|
|
||||||
whirlpool
|
|
||||||
whirlpoolx
|
|
||||||
x11 Dash
|
|
||||||
x11evo Revolvercoin
|
|
||||||
x11gost sib (SibCoin)
|
|
||||||
x12
|
|
||||||
x13
|
|
||||||
x13bcd bcd
|
|
||||||
x13sm3 hsr (Hshare)
|
|
||||||
x14
|
|
||||||
x15
|
|
||||||
x16r
|
|
||||||
x16rv2
|
|
||||||
x16rt
|
|
||||||
x16rt-veil veil
|
|
||||||
x16s
|
|
||||||
x17
|
|
||||||
x20r
|
|
||||||
x21s
|
|
||||||
x22i
|
|
||||||
x25x
|
|
||||||
xevan Bitsend (BSD)
|
|
||||||
yescrypt Globalboost-Y (BSTY)
|
|
||||||
yescryptr8 BitZeny (ZNY)
|
|
||||||
yescryptr8g Koto (KOTO)
|
|
||||||
yescryptr16 Eli
|
|
||||||
yescryptr32 WAVI
|
|
||||||
yespower Cryply
|
|
||||||
yespowerr16 Yenten (YTN)
|
|
||||||
yespower-b2b generic yespower + blake2b
|
|
||||||
zr5 Ziftr
|
|
||||||
rinhash RinHash
|
|
||||||
|
|
||||||
Many variations of scrypt based algos can be mine by specifying their
|
|
||||||
parameters:
|
|
||||||
|
|
||||||
scryptn2: --algo scrypt --param-n 1048576
|
|
||||||
|
|
||||||
cpupower: --algo yespower --param-key "CPUpower: The number of CPU working or available for proof-of-work mining"
|
|
||||||
|
|
||||||
power2b: --algo yespower-b2b --param-n 2048 --param-r 32 --param-key "Now I am become Death, the destroyer of worlds"
|
|
||||||
|
|
||||||
sugarchain: --algo yespower --param-n 2048 -param-r 32 --param-key "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"
|
|
||||||
|
|
||||||
yespoweriots: --algo yespower --param-n 2048 --param-key "Iots is committed to the development of IOT"
|
|
||||||
|
|
||||||
yespowerlitb: --algo yespower --param-n 2048 --param-r 32 --param-key "LITBpower: The number of LITB working or available for proof-of-work mini"
|
|
||||||
|
|
||||||
yespoweric: --algo yespower --param-n 2048 --param-r 32 --param-key "IsotopeC"
|
|
||||||
|
|
||||||
yespowerurx: --algo yespower --param-n 2048 --param-r 32 --param-key "UraniumX"
|
|
||||||
|
|
||||||
yespowerltncg: --algo yespower --param-n 2048 --param-r 32 --param-key "LTNCGYES"
|
|
||||||
|
|
||||||
Errata
|
|
||||||
------
|
|
||||||
|
|
||||||
Old algorithms that are no longer used frequently will not have the latest
|
|
||||||
optimizations.
|
|
||||||
|
|
||||||
Cryptonight and variants are no longer supported, use another miner.
|
|
||||||
|
|
||||||
Neoscrypt crashes on Windows, use legacy version.
|
|
||||||
|
|
||||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
|
||||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
|
||||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
|
||||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
|
||||||
|
|
||||||
cpuminer-opt does not work mining Decred algo at Nicehash and produces
|
|
||||||
only "invalid extranonce2 size" rejects.
|
|
||||||
|
|
||||||
Benchmark testing does not work for x11evo.
|
|
||||||
|
|
||||||
Bugs
|
|
||||||
----
|
|
||||||
|
|
||||||
Users are encouraged to post their bug reports using git issues or on the
|
|
||||||
Bitcoin Talk forum or opening an issue in git:
|
|
||||||
|
|
||||||
https://bitcointalk.org/index.php?topic=1326803.0
|
|
||||||
|
|
||||||
https://github.com/JayDDee/cpuminer-opt/issues
|
|
||||||
|
|
||||||
All problem reports must be accompanied by a proper problem definition.
|
|
||||||
This should include how the problem occurred, the command line and
|
|
||||||
output from the miner showing the startup messages and any errors.
|
|
||||||
A history is also useful, ie did it work before.
|
|
||||||
|
|
||||||
Donations
|
|
||||||
---------
|
|
||||||
|
|
||||||
cpuminer-opt has no fees of any kind but donations are accepted.
|
|
||||||
|
|
||||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
|
||||||
|
|
||||||
Happy mining!
|
|
||||||
|
|
@@ -1,80 +0,0 @@
|
|||||||
This file is included in the Windows binary package. Compile instructions
|
|
||||||
for Linux and Windows can be found in RELEASE_NOTES.
|
|
||||||
|
|
||||||
cpuminer-opt is open source and free of any fees. Many forks exist that are
|
|
||||||
closed source and contain usage fees. support open source free software.
|
|
||||||
|
|
||||||
This package is officially avalaible only from:
|
|
||||||
|
|
||||||
https://github.com/JayDDee/cpuminer-opt
|
|
||||||
|
|
||||||
No other sources should be trusted.
|
|
||||||
|
|
||||||
cpuminer is a console program that is executed from a DOS or Powershell
|
|
||||||
command prompt. There is no GUI and no mouse support.
|
|
||||||
|
|
||||||
New users are encouraged to consult the cpuminer-opt Wiki for detailed
|
|
||||||
information on usage:
|
|
||||||
|
|
||||||
https://github.com/JayDDee/cpuminer-opt/wiki
|
|
||||||
|
|
||||||
Miner programs are often flagged as malware by antivirus programs. This is
|
|
||||||
a false positive, they are flagged simply because they are cryptocurrency
|
|
||||||
miners. The source code is open for anyone to inspect. If you don't trust
|
|
||||||
the software, don't use it.
|
|
||||||
|
|
||||||
Choose the exe that best matches you CPU's features or use trial and
|
|
||||||
error to find the fastest one that works. Pay attention to
|
|
||||||
the features listed at cpuminer startup to ensure you are mining at
|
|
||||||
optimum speed using the best available features.
|
|
||||||
|
|
||||||
Architecture names and compile options used are only provided for
|
|
||||||
mainstream desktop CPUs. Budget CPUs like Pentium and Celeron are often
|
|
||||||
missing some features. Check your CPU.
|
|
||||||
|
|
||||||
Support for AMD CPUs older than Ryzen is incomplete and without specific
|
|
||||||
recommendations. Find the best fit. CPUs older than Piledriver, including
|
|
||||||
Athlon x2 and Phenom II x4, are not supported by cpuminer-opt due to an
|
|
||||||
incompatible implementation of SSE2 on these CPUs.
|
|
||||||
|
|
||||||
More information for Intel and AMD CPU architectures and their features
|
|
||||||
can be found on Wikipedia.
|
|
||||||
|
|
||||||
https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures
|
|
||||||
|
|
||||||
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
|
||||||
|
|
||||||
File name Architecture name
|
|
||||||
|
|
||||||
cpuminer-sse2.exe Core2, Nehalem, generic x86_64 with SSE2
|
|
||||||
cpuminer-aes-sse42.exe Westmere
|
|
||||||
cpuminer-avx.exe Sandybridge, Ivybridge
|
|
||||||
cpuminer-avx2.exe Haswell, Skylake, Kabylake, Coffeelake, Cometlake
|
|
||||||
cpuminer-avx2-sha.exe AMD Zen1, Zen2
|
|
||||||
cpuminer-avx2-sha-vaes.exe Intel Alderlake*, AMD Zen3
|
|
||||||
cpuminer-avx512.exe Intel HEDT Skylake-X, Cascadelake
|
|
||||||
cpuminer-avx512-sha-vaes.exe AMD Zen4, Intel Rocketlake, Icelake
|
|
||||||
|
|
||||||
* Alderlake is a hybrid architecture with a mix of E-cores & P-cores. Although
|
|
||||||
the P-cores can support AVX512 the E-cores can't so Intel decided to disable
|
|
||||||
AVX512 on the the P-cores.
|
|
||||||
|
|
||||||
Notes about included DLL files:
|
|
||||||
|
|
||||||
Downloading DLL files from alternative sources presents an inherent
|
|
||||||
security risk if their source is unknown. All DLL files included have
|
|
||||||
been copied from the Ubuntu-20.04 installation or compiled by me from
|
|
||||||
source code obtained from the author's official repository. The exact
|
|
||||||
procedure is documented in the build instructions for Windows:
|
|
||||||
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
|
||||||
|
|
||||||
Some included DLL files may already be installed on the system by Windows or
|
|
||||||
third party packages. They often will work and may be used instead of the
|
|
||||||
included version of the files.
|
|
||||||
|
|
||||||
|
|
||||||
If you like this software feel free to donate:
|
|
||||||
|
|
||||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
1206
rin/miner/cpuminer/aclocal.m4
vendored
1206
rin/miner/cpuminer/aclocal.m4
vendored
File diff suppressed because it is too large
Load Diff
@@ -1,465 +0,0 @@
|
|||||||
/////////////////////////////
|
|
||||||
////
|
|
||||||
//// NEW FEATURE: algo_gate
|
|
||||||
////
|
|
||||||
//// algos define targets for their common functions
|
|
||||||
//// and define a function for miner-thread to call to register
|
|
||||||
//// their targets. miner thread builds the gate, and array of structs
|
|
||||||
//// of function pointers, by calling each algo's register function.
|
|
||||||
// Functions in this file are used simultaneously by myultiple
|
|
||||||
// threads and must therefore be re-entrant.
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <memory.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include "algo-gate-api.h"
|
|
||||||
|
|
||||||
// Define null and standard functions.
|
|
||||||
//
|
|
||||||
// Generic null functions do nothing except satisfy the syntax and
|
|
||||||
// can be used for optional safe gate functions.
|
|
||||||
//
|
|
||||||
// null gate functions are genarally used for mandatory and unsafe functions
|
|
||||||
// and will usually display an error massage and/or return a fail code.
|
|
||||||
// They are registered by default and are expected to be overwritten.
|
|
||||||
//
|
|
||||||
// std functions are non-null functions used by the most number of algos
|
|
||||||
// are are default.
|
|
||||||
//
|
|
||||||
// aux functions are functions used by many, but not most, algos and must
|
|
||||||
// be registered by eech algo using them. They usually have descriptive
|
|
||||||
// names.
|
|
||||||
//
|
|
||||||
// custom functions are algo spefic and are defined and registered in the
|
|
||||||
// algo's source file and are usually named [algo]_[function].
|
|
||||||
//
|
|
||||||
// In most cases the default is a null or std function. However in some
|
|
||||||
// cases, for convenience when the null function is not the most popular,
|
|
||||||
// the std function will be defined as default and the algo must register
|
|
||||||
// an appropriate null function.
|
|
||||||
//
|
|
||||||
// similar algos may share a gate function that may be defined here or
|
|
||||||
// in a source file common to the similar algos.
|
|
||||||
//
|
|
||||||
// gate functions may call other gate functions under the following
|
|
||||||
// restrictions. Any gate function defined here or used by more than one
|
|
||||||
// algo must call other functions using the gate: algo_gate.[function].
|
|
||||||
// custom functions may call other custom functions directly using
|
|
||||||
// [algo]_[function], howver it is recommended to alway use the gate.
|
|
||||||
//
|
|
||||||
// If, under rare circumstances, an algo with a custom gate function
|
|
||||||
// needs to call a function of another algo it must define and register
|
|
||||||
// a private gate from its rgistration function and use it to call
|
|
||||||
// forein functions: [private_gate].[function]. If the algo needs to call
|
|
||||||
// a utility function defined here it may do so directly.
|
|
||||||
//
|
|
||||||
// The algo's gate registration function is caled once from the main thread
|
|
||||||
// and can do other intialization in addition such as setting options or
|
|
||||||
// other global or local (to the algo) variables.
|
|
||||||
|
|
||||||
// A set of predefined generic null functions that can be used as any null
|
|
||||||
// gate function with the same signature.
|
|
||||||
|
|
||||||
void do_nothing () {}
|
|
||||||
bool return_true () { return true; }
|
|
||||||
bool return_false () { return false; }
|
|
||||||
void *return_null () { return NULL; }
|
|
||||||
|
|
||||||
void algo_not_tested()
|
|
||||||
{
|
|
||||||
applog( LOG_WARNING,"Algo %s has not been tested live. It may not work",
|
|
||||||
algo_names[opt_algo] );
|
|
||||||
applog(LOG_WARNING,"and bad things may happen. Use at your own risk.");
|
|
||||||
}
|
|
||||||
|
|
||||||
void four_way_not_tested()
|
|
||||||
{
|
|
||||||
applog( LOG_WARNING,"Algo %s has not been tested using 4way. It may not", algo_names[opt_algo] );
|
|
||||||
applog( LOG_WARNING,"work or may be slower. Please report your results.");
|
|
||||||
}
|
|
||||||
|
|
||||||
void algo_not_implemented()
|
|
||||||
{
|
|
||||||
applog(LOG_ERR,"Algo %s has not been Implemented.",algo_names[opt_algo]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// default null functions
|
|
||||||
// deprecated, use generic as default
|
|
||||||
int null_scanhash()
|
|
||||||
{
|
|
||||||
applog(LOG_WARNING,"SWERR: undefined scanhash function in algo_gate");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default generic scanhash can be used in many cases. Not to be used when
|
|
||||||
// prehashing can be done or when byte swapping the data can be avoided.
|
|
||||||
int scanhash_generic( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
{
|
|
||||||
uint32_t edata[20] __attribute__((aligned(64)));
|
|
||||||
uint32_t hash[8] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
|
||||||
uint32_t *ptarget = work->target;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t last_nonce = max_nonce - 1;
|
|
||||||
uint32_t n = first_nonce;
|
|
||||||
const int thr_id = mythr->id;
|
|
||||||
const bool bench = opt_benchmark;
|
|
||||||
|
|
||||||
v128_bswap32_80( edata, pdata );
|
|
||||||
do
|
|
||||||
{
|
|
||||||
edata[19] = n;
|
|
||||||
if ( likely( algo_gate.hash( hash, edata, thr_id ) ) )
|
|
||||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
|
||||||
{
|
|
||||||
pdata[19] = bswap_32( n );
|
|
||||||
submit_solution( work, hash, mythr );
|
|
||||||
}
|
|
||||||
n++;
|
|
||||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
|
||||||
*hashes_done = n - first_nonce;
|
|
||||||
pdata[19] = n;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
|
||||||
|
|
||||||
//int scanhash_4way_64_64( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
|
|
||||||
//int scanhash_4way_64_640( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
|
|
||||||
int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
{
|
|
||||||
uint32_t hash32[8*4] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
|
|
||||||
uint32_t *pdata = work->data;
|
|
||||||
const uint32_t *ptarget = work->target;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t last_nonce = max_nonce - 4;
|
|
||||||
__m256i *noncev = (__m256i*)vdata + 9;
|
|
||||||
uint32_t n = first_nonce;
|
|
||||||
const int thr_id = mythr->id;
|
|
||||||
const uint32_t targ32_d7 = ptarget[7];
|
|
||||||
const bool bench = opt_benchmark;
|
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
|
||||||
// overwrite byte swapped nonce with original byte order for proper
|
|
||||||
// incrementing. The nonce only needs to byte swapped if it is to be
|
|
||||||
// sumbitted.
|
|
||||||
*noncev = mm256_intrlv_blend_32(
|
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
|
|
||||||
{
|
|
||||||
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
|
||||||
if ( valid_hash( lane_hash, ptarget ) )
|
|
||||||
{
|
|
||||||
pdata[19] = bswap_32( n + lane );
|
|
||||||
submit_solution( work, lane_hash, mythr );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
|
||||||
_mm256_set1_epi64x( 0x0000000400000000 ) );
|
|
||||||
n += 4;
|
|
||||||
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
|
|
||||||
pdata[19] = n;
|
|
||||||
*hashes_done = n - first_nonce;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//int scanhash_8way_32_32( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(SIMD512)
|
|
||||||
|
|
||||||
//int scanhash_8way_64_64( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
|
|
||||||
//int scanhash_8way_64_640( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
|
|
||||||
int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
{
|
|
||||||
uint32_t hash32[8*8] __attribute__ ((aligned (128)));
|
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t *hash32_d7 = &(hash32[7*8]);
|
|
||||||
uint32_t *pdata = work->data;
|
|
||||||
const uint32_t *ptarget = work->target;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t last_nonce = max_nonce - 8;
|
|
||||||
__m512i *noncev = (__m512i*)vdata + 9;
|
|
||||||
uint32_t n = first_nonce;
|
|
||||||
const int thr_id = mythr->id;
|
|
||||||
const uint32_t targ32_d7 = ptarget[7];
|
|
||||||
const bool bench = opt_benchmark;
|
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
|
||||||
*noncev = mm512_intrlv_blend_32(
|
|
||||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
|
||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
|
||||||
if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
|
|
||||||
{
|
|
||||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
|
||||||
{
|
|
||||||
pdata[19] = bswap_32( n + lane );
|
|
||||||
submit_solution( work, lane_hash, mythr );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
|
||||||
_mm512_set1_epi64( 0x0000000800000000 ) );
|
|
||||||
n += 8;
|
|
||||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
|
||||||
pdata[19] = n;
|
|
||||||
*hashes_done = n - first_nonce;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//int scanhash_16way_32_32( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int null_hash()
|
|
||||||
{
|
|
||||||
applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
|
|
||||||
return 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void init_algo_gate( algo_gate_t* gate )
|
|
||||||
{
|
|
||||||
gate->miner_thread_init = (void*)&return_true;
|
|
||||||
gate->scanhash = (void*)&scanhash_generic;
|
|
||||||
gate->hash = (void*)&null_hash;
|
|
||||||
gate->get_new_work = (void*)&std_get_new_work;
|
|
||||||
gate->work_decode = (void*)&std_le_work_decode;
|
|
||||||
gate->decode_extra_data = (void*)&do_nothing;
|
|
||||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
|
||||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
|
||||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
|
||||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
|
||||||
gate->build_block_header = (void*)&std_build_block_header;
|
|
||||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
|
||||||
gate->set_work_data_endian = (void*)&do_nothing;
|
|
||||||
// gate->resync_threads = (void*)&do_nothing;
|
|
||||||
// gate->do_this_thread = (void*)&return_true;
|
|
||||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
|
||||||
gate->get_work_data_size = (void*)&std_get_work_data_size;
|
|
||||||
gate->optimizations = EMPTY_SET;
|
|
||||||
gate->ntime_index = STD_NTIME_INDEX;
|
|
||||||
gate->nbits_index = STD_NBITS_INDEX;
|
|
||||||
gate->nonce_index = STD_NONCE_INDEX;
|
|
||||||
gate->work_cmp_size = STD_WORK_CMP_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore warnings for not yet defined register functions
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
|
|
||||||
|
|
||||||
// Called once by main
|
|
||||||
bool register_algo_gate( int algo, algo_gate_t *gate )
|
|
||||||
{
|
|
||||||
bool rc = false;
|
|
||||||
|
|
||||||
if ( NULL == gate )
|
|
||||||
{
|
|
||||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
init_algo_gate( gate );
|
|
||||||
|
|
||||||
switch ( algo )
|
|
||||||
{
|
|
||||||
case ALGO_ALLIUM: rc = register_allium_algo ( gate ); break;
|
|
||||||
case ALGO_ANIME: rc = register_anime_algo ( gate ); break;
|
|
||||||
case ALGO_ARGON2D250: rc = register_argon2d250_algo ( gate ); break;
|
|
||||||
case ALGO_ARGON2D500: rc = register_argon2d500_algo ( gate ); break;
|
|
||||||
case ALGO_ARGON2D4096: rc = register_argon2d4096_algo ( gate ); break;
|
|
||||||
case ALGO_AXIOM: rc = register_axiom_algo ( gate ); break;
|
|
||||||
case ALGO_BLAKE: rc = register_blake_algo ( gate ); break;
|
|
||||||
case ALGO_BLAKE2B: rc = register_blake2b_algo ( gate ); break;
|
|
||||||
case ALGO_BLAKE2S: rc = register_blake2s_algo ( gate ); break;
|
|
||||||
case ALGO_BLAKECOIN: rc = register_blakecoin_algo ( gate ); break;
|
|
||||||
case ALGO_BMW512: rc = register_bmw512_algo ( gate ); break;
|
|
||||||
case ALGO_C11: rc = register_c11_algo ( gate ); break;
|
|
||||||
case ALGO_DEEP: rc = register_deep_algo ( gate ); break;
|
|
||||||
case ALGO_DMD_GR: rc = register_dmd_gr_algo ( gate ); break;
|
|
||||||
case ALGO_GROESTL: rc = register_groestl_algo ( gate ); break;
|
|
||||||
case ALGO_HEX: rc = register_hex_algo ( gate ); break;
|
|
||||||
case ALGO_HMQ1725: rc = register_hmq1725_algo ( gate ); break;
|
|
||||||
case ALGO_JHA: rc = register_jha_algo ( gate ); break;
|
|
||||||
case ALGO_KECCAK: rc = register_keccak_algo ( gate ); break;
|
|
||||||
case ALGO_KECCAKC: rc = register_keccakc_algo ( gate ); break;
|
|
||||||
case ALGO_LBRY: rc = register_lbry_algo ( gate ); break;
|
|
||||||
case ALGO_LYRA2H: rc = register_lyra2h_algo ( gate ); break;
|
|
||||||
case ALGO_LYRA2RE: rc = register_lyra2re_algo ( gate ); break;
|
|
||||||
case ALGO_LYRA2REV2: rc = register_lyra2rev2_algo ( gate ); break;
|
|
||||||
case ALGO_LYRA2REV3: rc = register_lyra2rev3_algo ( gate ); break;
|
|
||||||
case ALGO_LYRA2Z: rc = register_lyra2z_algo ( gate ); break;
|
|
||||||
case ALGO_LYRA2Z330: rc = register_lyra2z330_algo ( gate ); break;
|
|
||||||
case ALGO_M7M: rc = register_m7m_algo ( gate ); break;
|
|
||||||
case ALGO_MINOTAUR: rc = register_minotaur_algo ( gate ); break;
|
|
||||||
case ALGO_MINOTAURX: rc = register_minotaur_algo ( gate ); break;
|
|
||||||
case ALGO_MYR_GR: rc = register_myriad_algo ( gate ); break;
|
|
||||||
case ALGO_NEOSCRYPT: rc = register_neoscrypt_algo ( gate ); break;
|
|
||||||
case ALGO_NIST5: rc = register_nist5_algo ( gate ); break;
|
|
||||||
case ALGO_PENTABLAKE: rc = register_pentablake_algo ( gate ); break;
|
|
||||||
case ALGO_PHI1612: rc = register_phi1612_algo ( gate ); break;
|
|
||||||
case ALGO_PHI2: rc = register_phi2_algo ( gate ); break;
|
|
||||||
case ALGO_POLYTIMOS: rc = register_polytimos_algo ( gate ); break;
|
|
||||||
case ALGO_POWER2B: rc = register_power2b_algo ( gate ); break;
|
|
||||||
case ALGO_QUARK: rc = register_quark_algo ( gate ); break;
|
|
||||||
case ALGO_QUBIT: rc = register_qubit_algo ( gate ); break;
|
|
||||||
case ALGO_SCRYPT: rc = register_scrypt_algo ( gate ); break;
|
|
||||||
case ALGO_SHA256D: rc = register_sha256d_algo ( gate ); break;
|
|
||||||
case ALGO_SHA256DT: rc = register_sha256dt_algo ( gate ); break;
|
|
||||||
case ALGO_SHA256Q: rc = register_sha256q_algo ( gate ); break;
|
|
||||||
case ALGO_SHA256T: rc = register_sha256t_algo ( gate ); break;
|
|
||||||
case ALGO_SHA3D: rc = register_sha3d_algo ( gate ); break;
|
|
||||||
case ALGO_SHA512256D: rc = register_sha512256d_algo ( gate ); break;
|
|
||||||
case ALGO_SKEIN: rc = register_skein_algo ( gate ); break;
|
|
||||||
case ALGO_SKEIN2: rc = register_skein2_algo ( gate ); break;
|
|
||||||
case ALGO_SKUNK: rc = register_skunk_algo ( gate ); break;
|
|
||||||
case ALGO_SONOA: rc = register_sonoa_algo ( gate ); break;
|
|
||||||
case ALGO_TIMETRAVEL: rc = register_timetravel_algo ( gate ); break;
|
|
||||||
case ALGO_TIMETRAVEL10: rc = register_timetravel10_algo ( gate ); break;
|
|
||||||
case ALGO_TRIBUS: rc = register_tribus_algo ( gate ); break;
|
|
||||||
case ALGO_VANILLA: rc = register_vanilla_algo ( gate ); break;
|
|
||||||
case ALGO_VELTOR: rc = register_veltor_algo ( gate ); break;
|
|
||||||
case ALGO_VERTHASH: rc = register_verthash_algo ( gate ); break;
|
|
||||||
case ALGO_WHIRLPOOL: rc = register_whirlpool_algo ( gate ); break;
|
|
||||||
case ALGO_WHIRLPOOLX: rc = register_whirlpoolx_algo ( gate ); break;
|
|
||||||
case ALGO_X11: rc = register_x11_algo ( gate ); break;
|
|
||||||
case ALGO_X11EVO: rc = register_x11evo_algo ( gate ); break;
|
|
||||||
case ALGO_X11GOST: rc = register_x11gost_algo ( gate ); break;
|
|
||||||
case ALGO_X12: rc = register_x12_algo ( gate ); break;
|
|
||||||
case ALGO_X13: rc = register_x13_algo ( gate ); break;
|
|
||||||
case ALGO_X13BCD: rc = register_x13bcd_algo ( gate ); break;
|
|
||||||
case ALGO_X13SM3: rc = register_x13sm3_algo ( gate ); break;
|
|
||||||
case ALGO_X14: rc = register_x14_algo ( gate ); break;
|
|
||||||
case ALGO_X15: rc = register_x15_algo ( gate ); break;
|
|
||||||
case ALGO_X16R: rc = register_x16r_algo ( gate ); break;
|
|
||||||
case ALGO_X16RV2: rc = register_x16rv2_algo ( gate ); break;
|
|
||||||
case ALGO_X16RT: rc = register_x16rt_algo ( gate ); break;
|
|
||||||
case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break;
|
|
||||||
case ALGO_X16S: rc = register_x16s_algo ( gate ); break;
|
|
||||||
case ALGO_X17: rc = register_x17_algo ( gate ); break;
|
|
||||||
case ALGO_X20R: rc = register_x20r_algo ( gate ); break;
|
|
||||||
case ALGO_X21S: rc = register_x21s_algo ( gate ); break;
|
|
||||||
case ALGO_X22I: rc = register_x22i_algo ( gate ); break;
|
|
||||||
case ALGO_X25X: rc = register_x25x_algo ( gate ); break;
|
|
||||||
case ALGO_XEVAN: rc = register_xevan_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPT: rc = register_yescrypt_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR8: rc = register_yescryptr8_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR8G: rc = register_yescryptr8g_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR16: rc = register_yescryptr16_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR32: rc = register_yescryptr32_algo ( gate ); break;
|
|
||||||
case ALGO_YESPOWER: rc = register_yespower_algo ( gate ); break;
|
|
||||||
case ALGO_YESPOWERR16: rc = register_yespowerr16_algo ( gate ); break;
|
|
||||||
case ALGO_YESPOWER_B2B: rc = register_yespower_b2b_algo ( gate ); break;
|
|
||||||
case ALGO_ZR5: rc = register_zr5_algo ( gate ); break;
|
|
||||||
case ALGO_RINHASH: rc = register_rin_algo ( gate ); break;
|
|
||||||
default:
|
|
||||||
applog(LOG_ERR,"BUG: unregistered algorithm %s.\n", algo_names[opt_algo] );
|
|
||||||
return false;
|
|
||||||
} // switch
|
|
||||||
|
|
||||||
if ( !rc )
|
|
||||||
{
|
|
||||||
applog(LOG_ERR, "FAIL: %s algorithm failed to initialize\n", algo_names[opt_algo] );
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// restore warnings
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
|
|
||||||
void exec_hash_function( int algo, void *output, const void *pdata )
|
|
||||||
{
|
|
||||||
algo_gate_t gate;
|
|
||||||
gate.hash = (void*)&null_hash;
|
|
||||||
register_algo_gate( algo, &gate );
|
|
||||||
gate.hash( output, pdata, 0 );
|
|
||||||
}
|
|
||||||
|
|
||||||
#define PROPER (1)
|
|
||||||
#define ALIAS (0)
|
|
||||||
|
|
||||||
// The only difference between the alias and the proper algo name is the
|
|
||||||
// proper name is the one that is defined in ALGO_NAMES. There may be
|
|
||||||
// multiple aliases that map to the same proper name.
|
|
||||||
// New aliases can be added anywhere in the array as long as NULL is last.
|
|
||||||
// Alphabetic order of alias is recommended.
|
|
||||||
const char* const algo_alias_map[][2] =
|
|
||||||
{
|
|
||||||
// alias proper
|
|
||||||
{ "bcd", "x13bcd" },
|
|
||||||
{ "bitcore", "timetravel10" },
|
|
||||||
{ "bitzeny", "yescryptr8" },
|
|
||||||
{ "blake256r8", "blakecoin" },
|
|
||||||
{ "blake256r8vnl", "vanilla" },
|
|
||||||
{ "blake256r14", "blake" },
|
|
||||||
{ "diamond", "dmd-gr" },
|
|
||||||
{ "espers", "hmq1725" },
|
|
||||||
{ "flax", "c11" },
|
|
||||||
{ "hsr", "x13sm3" },
|
|
||||||
{ "jackpot", "jha" },
|
|
||||||
{ "lyra2", "lyra2re" },
|
|
||||||
{ "lyra2v2", "lyra2rev2" },
|
|
||||||
{ "lyra2v3", "lyra2rev3" },
|
|
||||||
{ "myrgr", "myr-gr" },
|
|
||||||
{ "myriad", "myr-gr" },
|
|
||||||
{ "neo", "neoscrypt" },
|
|
||||||
{ "phi", "phi1612" },
|
|
||||||
{ "scryptn2", "scrypt:1048576" },
|
|
||||||
{ "sib", "x11gost" },
|
|
||||||
{ "timetravel8", "timetravel" },
|
|
||||||
{ "veil", "x16rt-veil" },
|
|
||||||
{ "x16r-hex", "hex" },
|
|
||||||
{ "yenten", "yescryptr16" },
|
|
||||||
{ "ziftr", "zr5" },
|
|
||||||
{ "rinhash", "rinhash" }
|
|
||||||
};
|
|
||||||
|
|
||||||
// if arg is a valid alias for a known algo it is updated with the proper
|
|
||||||
// name. No validation of the algo or alias is done, It is the responsinility
|
|
||||||
// of the calling function to validate the algo after return.
|
|
||||||
void get_algo_alias( char** algo_or_alias )
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for ( i=0; algo_alias_map[i][ALIAS]; i++ )
|
|
||||||
if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
|
|
||||||
{
|
|
||||||
// found valid alias, return proper name
|
|
||||||
*algo_or_alias = (char*)( algo_alias_map[i][ PROPER ] );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef ALIAS
|
|
||||||
#undef PROPER
|
|
||||||
|
|
@@ -1,324 +0,0 @@
|
|||||||
#ifndef __ALGO_GATE_API_H__
|
|
||||||
#define __ALGO_GATE_API_H__ 1
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include "miner.h"
|
|
||||||
#include "simd-utils.h"
|
|
||||||
|
|
||||||
/////////////////////////////
|
|
||||||
////
|
|
||||||
//// NEW FEATURE: algo_gate
|
|
||||||
////
|
|
||||||
//// algos define targets for their common functions
|
|
||||||
//// and define a function for miner-thread to call to register
|
|
||||||
//// their targets. miner thread builds the gate, and array of structs
|
|
||||||
//// of function pointers, by calling each algo's register function.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// So you want to add an algo. Well it is a little easier now.
|
|
||||||
// Look at existing algos for guidance.
|
|
||||||
//
|
|
||||||
// 1. Define the algo, miner.h, previously in cpu-miner.c
|
|
||||||
//
|
|
||||||
// 2.Define custom versions of the mandatory function for the new algo.
|
|
||||||
//
|
|
||||||
// 3. Next look through the list of unsafe functions to determine
|
|
||||||
// if any apply to the new algo. If so they must also be defined.
|
|
||||||
//
|
|
||||||
// 4. Look through the list of safe functions to see if any apply
|
|
||||||
// to the new algo. If so look at the null instance of the function
|
|
||||||
// to see if it satisfies its needs.
|
|
||||||
//
|
|
||||||
// 5. If any of the default safe functions are not fit for the new algo
|
|
||||||
// a custom function will have to be defined.
|
|
||||||
//
|
|
||||||
// 6. Determine if other non existant functions are required.
|
|
||||||
// That is determined by the need to add code in cpu-miner.c
|
|
||||||
// that applies only to the new algo. That is forbidden. All
|
|
||||||
// algo specific code must be in the algo's file.
|
|
||||||
//
|
|
||||||
// 7. If new functions need to be added to the gate add the type
|
|
||||||
// to the structure, declare a null instance in this file and define
|
|
||||||
// it in algo-gate-api.c. It must be a safe optional function so the null
|
|
||||||
// instance must return a success code and otherwise do nothing.
|
|
||||||
//
|
|
||||||
// 8. When all the custom functions are defined write a registration
|
|
||||||
// function to initialze the gate's function pointers with the custom
|
|
||||||
// functions. It is not necessary to initialze safe optional null
|
|
||||||
// instances as they are defined by default, or unsafe functions that
|
|
||||||
// are not needed by the algo.
|
|
||||||
//
|
|
||||||
// 9. Add a case entry to the switch/case in function register_gate
|
|
||||||
// in file algo-gate-api.c for the new algo.
|
|
||||||
//
|
|
||||||
// 10 If a new function type was defined add an entry to init algo_gate
|
|
||||||
// to initialize the new function to its null instance described in step 7.
|
|
||||||
//
|
|
||||||
// 11. If the new algo has aliases add them to the alias array in
|
|
||||||
// algo-gate-api.c
|
|
||||||
//
|
|
||||||
// 12. Include algo-gate-api.h and miner.h inthe algo's source file.
|
|
||||||
//
|
|
||||||
// 13. Inlude any other algo source files required by the new algo.
|
|
||||||
//
|
|
||||||
// 14. Done, compile and run.
|
|
||||||
|
|
||||||
|
|
||||||
// declare some function pointers
|
|
||||||
// mandatory functions require a custom function specific to the algo
|
|
||||||
// be defined.
|
|
||||||
// otherwise the null instance will return a fail code.
|
|
||||||
// Optional functions may not be required for certain algos or the null
|
|
||||||
// instance provides a safe default. If the default is suitable for
|
|
||||||
// an algo it is not necessary to define a custom function.
|
|
||||||
//
|
|
||||||
|
|
||||||
// my hack at creating a set data type using bit masks. Set inclusion,
|
|
||||||
// exclusion union and intersection operations are provided for convenience. In // some cases it may be desireable to use boolean algebra directly on the
|
|
||||||
// data to perform set operations. Sets can be represented as single
|
|
||||||
// elements, a bitwise OR of multiple elements, a bitwise OR of multiple
|
|
||||||
// set variables or constants, or combinations of the above.
|
|
||||||
// Examples:
|
|
||||||
//
|
|
||||||
// my_set = set_element;
|
|
||||||
// another_set = my_set | another_set_element;
|
|
||||||
|
|
||||||
typedef uint32_t set_t;
|
|
||||||
|
|
||||||
#define EMPTY_SET 0
|
|
||||||
#define SSE2_OPT 1 // parity with NEON
|
|
||||||
#define SSSE3_OPT 1 << 1 // Intel Core2
|
|
||||||
#define SSE41_OPT 1 << 2
|
|
||||||
#define SSE42_OPT 1 << 3
|
|
||||||
#define AVX_OPT 1 << 4 // Intel Sandybridge
|
|
||||||
#define AVX2_OPT 1 << 5 // Intel Haswell, AMD Zen1
|
|
||||||
#define AVX512_OPT 1 << 6 // Skylake-X, Zen4 (AVX512[F,VL,DQ,BW])
|
|
||||||
#define AES_OPT 1 << 7 // Intel Westmere, AArch64
|
|
||||||
#define VAES_OPT 1 << 8 // Icelake, Zen3
|
|
||||||
#define SHA256_OPT 1 << 9 // Zen1, Icelake, AArch64
|
|
||||||
#define SHA512_OPT 1 << 10 // Intel Arrow Lake, AArch64
|
|
||||||
#define NEON_OPT 1 << 11 // AArch64
|
|
||||||
#define AVX10_256 1 << 12
|
|
||||||
#define AVX10_512 1 << 13
|
|
||||||
|
|
||||||
// AVX10 does not have explicit algo features:
|
|
||||||
// AVX10_512 is compatible with AVX512 + VAES
|
|
||||||
// AVX10_256 is compatible with AVX2 + VAES
|
|
||||||
|
|
||||||
// return set containing all elements from sets a & b
|
|
||||||
static inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
|
||||||
|
|
||||||
// return set contained common elements from sets a & b
|
|
||||||
static inline set_t set_intsec ( set_t a, set_t b) { return a & b; }
|
|
||||||
|
|
||||||
// all elements in set a are included in set b
|
|
||||||
static inline bool set_incl ( set_t a, set_t b ) { return (a & b) == a; }
|
|
||||||
|
|
||||||
// no elements in set a are included in set b
|
|
||||||
static inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
// Mandatory functions, one of these is mandatory. If a generic scanhash
|
|
||||||
// is used a custom target hash function must be registered, with a custom
|
|
||||||
// scanhash the target hash function can be called directly and doesn't need
|
|
||||||
// to be registered with the gate.
|
|
||||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
|
||||||
|
|
||||||
int ( *hash ) ( void*, const void*, int );
|
|
||||||
|
|
||||||
//optional, safe to use default in most cases
|
|
||||||
|
|
||||||
// Called once by each miner thread to allocate thread local buffers and
|
|
||||||
// other initialization specific to miner threads.
|
|
||||||
bool ( *miner_thread_init ) ( int );
|
|
||||||
|
|
||||||
// Get thread local copy of blockheader with unique nonce.
|
|
||||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
|
||||||
|
|
||||||
// Decode getwork blockheader
|
|
||||||
bool ( *work_decode ) ( struct work* );
|
|
||||||
|
|
||||||
// Extra getwork data
|
|
||||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
|
||||||
|
|
||||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
|
||||||
|
|
||||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
|
||||||
|
|
||||||
// Increment extranonce
|
|
||||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
|
||||||
|
|
||||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
|
||||||
uint32_t*, uint32_t, uint32_t,
|
|
||||||
unsigned char* );
|
|
||||||
|
|
||||||
// Build mining.submit message
|
|
||||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
|
||||||
|
|
||||||
char* ( *malloc_txs_request ) ( struct work* );
|
|
||||||
|
|
||||||
// Big endian or little endian
|
|
||||||
void ( *set_work_data_endian ) ( struct work* );
|
|
||||||
|
|
||||||
// Diverge mining threads
|
|
||||||
//bool ( *do_this_thread ) ( int );
|
|
||||||
|
|
||||||
// After do_this_thread
|
|
||||||
//void ( *resync_threads ) ( int, struct work* );
|
|
||||||
|
|
||||||
json_t* ( *longpoll_rpc_call ) ( CURL*, int*, char* );
|
|
||||||
|
|
||||||
set_t optimizations;
|
|
||||||
int ( *get_work_data_size ) ();
|
|
||||||
int ntime_index;
|
|
||||||
int nbits_index;
|
|
||||||
int nonce_index; // use with caution, see warning below
|
|
||||||
int work_cmp_size;
|
|
||||||
} algo_gate_t;
|
|
||||||
|
|
||||||
extern algo_gate_t algo_gate;
|
|
||||||
|
|
||||||
// Declare generic null targets, default for many gate functions
|
|
||||||
// Functions that use one of these generic targets do not have
|
|
||||||
// a default defined below. Some algos may override a defined default
|
|
||||||
// with a generic.
|
|
||||||
void do_nothing();
|
|
||||||
bool return_true();
|
|
||||||
bool return_false();
|
|
||||||
void *return_null();
|
|
||||||
void algo_not_tested();
|
|
||||||
void algo_not_implemented();
|
|
||||||
void four_way_not_tested();
|
|
||||||
|
|
||||||
// Warning: algo_gate.nonce_index should only be used in targetted code
|
|
||||||
// due to different behaviours by different targets. The JR2 index uses an
|
|
||||||
// 8 bit offset while all others user 32 bit offset. c/c++ pointer arithmetic
|
|
||||||
// conventions results in different behaviour for pointers with different
|
|
||||||
// target sizes requiring customized casting to make it work consistently.
|
|
||||||
// Rant mode: yet another thing I hate about c/c++. Array indexes should
|
|
||||||
// be scaled, pointer offsets should always be bytes. No confusion and no
|
|
||||||
// hidden math.
|
|
||||||
|
|
||||||
#define STD_NTIME_INDEX 17
|
|
||||||
#define STD_NBITS_INDEX 18
|
|
||||||
#define STD_NONCE_INDEX 19 // 32 bit offset
|
|
||||||
#define STD_WORK_DATA_SIZE 128
|
|
||||||
#define STD_WORK_CMP_SIZE 76
|
|
||||||
|
|
||||||
//#define JR2_NONCE_INDEX 39 // 8 bit offset
|
|
||||||
|
|
||||||
// These indexes are only used with JSON RPC2 and are not gated.
|
|
||||||
//#define JR2_WORK_CMP_INDEX_2 43
|
|
||||||
//#define JR2_WORK_CMP_SIZE_2 33
|
|
||||||
|
|
||||||
// deprecated, use generic instead
|
|
||||||
int null_scanhash();
|
|
||||||
|
|
||||||
// Default generic, may be used in many cases.
|
|
||||||
// N-way is more complicated, requires many different implementations
|
|
||||||
// depending on architecture, input format, and output format.
|
|
||||||
// Naming convention is scanhash_[N]way_[input format]in_[output format]out
|
|
||||||
// N = number of lanes
|
|
||||||
// input/output format:
|
|
||||||
// 32: 32 bit interleaved parallel lanes
|
|
||||||
// 64: 64 bit interleaved parallel lanes
|
|
||||||
// 640: input only, not interleaved, contiguous serial 640 bit lanes.
|
|
||||||
// 256: output only, not interleaved, contiguous serial 256 bit lanes.
|
|
||||||
|
|
||||||
int scanhash_generic( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
|
||||||
|
|
||||||
//int scanhash_4way_64in_64out( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
//int scanhash_4way_64in_256out( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
//int scanhash_8way_32in_32out( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(SIMD512)
|
|
||||||
|
|
||||||
//int scanhash_8way_64in_64out( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
//int scanhash_8way_64in_256out( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
//int scanhash_16way_32in_32out( struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// displays warning
|
|
||||||
int null_hash();
|
|
||||||
|
|
||||||
// optional safe targets, default listed first unless noted.
|
|
||||||
|
|
||||||
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
|
||||||
uint32_t* end_nonce_ptr );
|
|
||||||
|
|
||||||
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
|
|
||||||
void sha256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
|
||||||
// OpenSSL sha256 deprecated
|
|
||||||
//void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
|
||||||
|
|
||||||
bool std_le_work_decode( struct work *work );
|
|
||||||
bool std_be_work_decode( struct work *work );
|
|
||||||
|
|
||||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
|
||||||
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
|
||||||
|
|
||||||
void std_le_build_stratum_request( char *req, struct work *work );
|
|
||||||
void std_be_build_stratum_request( char *req, struct work *work );
|
|
||||||
|
|
||||||
char* std_malloc_txs_request( struct work *work );
|
|
||||||
|
|
||||||
// Default is do_nothing, little endian is assumed
|
|
||||||
void set_work_data_big_endian( struct work *work );
|
|
||||||
|
|
||||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
|
||||||
uint32_t *prevhash, uint32_t *merkle_root,
|
|
||||||
uint32_t ntime, uint32_t nbits,
|
|
||||||
unsigned char *final_sapling_hash );
|
|
||||||
|
|
||||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
|
||||||
|
|
||||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
|
||||||
|
|
||||||
int std_get_work_data_size();
|
|
||||||
|
|
||||||
// Gate admin functions
|
|
||||||
|
|
||||||
// Called from main to initialize all gate functions and algo-specific data
|
|
||||||
// by calling the algo's register function.
|
|
||||||
bool register_algo_gate( int algo, algo_gate_t *gate );
|
|
||||||
|
|
||||||
// Called by algos to verride any default gate functions that are applicable
|
|
||||||
// and do any other algo-specific initialization.
|
|
||||||
// The register functions for all the algos can be declared here to reduce
|
|
||||||
// compiler warnings but that's just more work for devs adding new algos.
|
|
||||||
bool register_algo( algo_gate_t *gate );
|
|
||||||
|
|
||||||
// use this to call the hash function of an algo directly, ie util.c test.
|
|
||||||
void exec_hash_function( int algo, void *output, const void *pdata );
|
|
||||||
|
|
||||||
// Validate a string as a known algo and alias, updates arg to proper
|
|
||||||
// algo name if valid alias, NULL if invalid alias or algo.
|
|
||||||
void get_algo_alias( char **algo_or_alias );
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1,732 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2014 ccminer team
|
|
||||||
*
|
|
||||||
* Implementation by tpruvot (based on cgminer)
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
|
||||||
* Software Foundation; either version 2 of the License, or (at your option)
|
|
||||||
* any later version. See COPYING for more details.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define APIVERSION "1.0"
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
# define _WINSOCK_DEPRECATED_NO_WARNINGS
|
|
||||||
# include <winsock2.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdarg.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include "algo/sha/sha1-hash.h"
|
|
||||||
|
|
||||||
#include "miner.h"
|
|
||||||
#include "sysinfos.c"
|
|
||||||
#ifndef WIN32
|
|
||||||
# include <errno.h>
|
|
||||||
# include <sys/socket.h>
|
|
||||||
# include <netinet/in.h>
|
|
||||||
# include <arpa/inet.h>
|
|
||||||
# include <netdb.h>
|
|
||||||
# define SOCKETTYPE long
|
|
||||||
# define SOCKETFAIL(a) ((a) < 0)
|
|
||||||
# define INVSOCK -1 /* INVALID_SOCKET */
|
|
||||||
# define INVINETADDR -1 /* INADDR_NONE */
|
|
||||||
# define CLOSESOCKET close
|
|
||||||
# define SOCKETINIT {}
|
|
||||||
# define SOCKERRMSG strerror(errno)
|
|
||||||
#else
|
|
||||||
# define SOCKETTYPE SOCKET
|
|
||||||
# define SOCKETFAIL(a) ((a) == SOCKET_ERROR)
|
|
||||||
# define INVSOCK INVALID_SOCKET
|
|
||||||
# define INVINETADDR INADDR_NONE
|
|
||||||
# define CLOSESOCKET closesocket
|
|
||||||
# define in_addr_t uint32_t
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GROUP(g) (toupper(g))
|
|
||||||
#define PRIVGROUP GROUP('W')
|
|
||||||
#define NOPRIVGROUP GROUP('R')
|
|
||||||
#define ISPRIVGROUP(g) (GROUP(g) == PRIVGROUP)
|
|
||||||
#define GROUPOFFSET(g) (GROUP(g) - GROUP('A'))
|
|
||||||
#define VALIDGROUP(g) (GROUP(g) >= GROUP('A') && GROUP(g) <= GROUP('Z'))
|
|
||||||
#define COMMANDS(g) (apigroups[GROUPOFFSET(g)].commands)
|
|
||||||
#define DEFINEDGROUP(g) (ISPRIVGROUP(g) || COMMANDS(g) != NULL)
|
|
||||||
struct APIGROUPS {
|
|
||||||
// This becomes a string like: "|cmd1|cmd2|cmd3|" so it's quick to search
|
|
||||||
char *commands;
|
|
||||||
} apigroups['Z' - 'A' + 1]; // only A=0 to Z=25 (R: noprivs, W: allprivs)
|
|
||||||
|
|
||||||
struct IP4ACCESS {
|
|
||||||
in_addr_t ip;
|
|
||||||
in_addr_t mask;
|
|
||||||
char group;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int ips = 1;
|
|
||||||
static struct IP4ACCESS *ipaccess = NULL;
|
|
||||||
|
|
||||||
// Socket data buffers
|
|
||||||
#define MYBUFSIZ 16384
|
|
||||||
#define SOCK_REC_BUFSZ 1024
|
|
||||||
|
|
||||||
// Socket is on 127.0.0.1
|
|
||||||
#define QUEUE 10
|
|
||||||
|
|
||||||
#define ALLIP4 "0.0.0.0"
|
|
||||||
|
|
||||||
static const char *localaddr = "127.0.0.1";
|
|
||||||
static const char *UNAVAILABLE = " - API will not be available";
|
|
||||||
static char *buffer = NULL;
|
|
||||||
static time_t startup = 0;
|
|
||||||
static int bye = 0;
|
|
||||||
|
|
||||||
extern char *opt_api_allow;
|
|
||||||
extern int opt_api_listen; /* port */
|
|
||||||
extern int opt_api_remote;
|
|
||||||
extern double global_hashrate;
|
|
||||||
//extern uint32_t accepted_count;
|
|
||||||
//extern uint32_t rejected_count;
|
|
||||||
//extern uint32_t solved_count;
|
|
||||||
|
|
||||||
#define cpu_threads opt_n_threads
|
|
||||||
|
|
||||||
#define USE_MONITORING
|
|
||||||
extern float cpu_temp(int);
|
|
||||||
extern uint32_t cpu_clock(int);
|
|
||||||
//extern int cpu_fanpercent(void);
|
|
||||||
|
|
||||||
/***************************************************************/
|
|
||||||
|
|
||||||
static void cpustatus(int thr_id)
|
|
||||||
{
|
|
||||||
if ( thr_id >= 0 && thr_id < opt_n_threads )
|
|
||||||
{
|
|
||||||
// struct cpu_info *cpu = &thr_info[thr_id].cpu;
|
|
||||||
char buf[512]; *buf = '\0';
|
|
||||||
char units[4] = {0};
|
|
||||||
double hashrate = thr_hashrates[thr_id];
|
|
||||||
|
|
||||||
scale_hash_for_display ( &hashrate, units );
|
|
||||||
snprintf( buf, sizeof(buf), "CPU=%d;%sH/s=%.2f|", thr_id, units,
|
|
||||||
hashrate );
|
|
||||||
// append to buffer
|
|
||||||
strcat( buffer, buf );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*****************************************************************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns miner global infos
|
|
||||||
*/
|
|
||||||
static char *getsummary( char *params )
|
|
||||||
{
|
|
||||||
char algo[64]; *algo = '\0';
|
|
||||||
time_t ts = time(NULL);
|
|
||||||
double uptime = difftime(ts, startup);
|
|
||||||
double accps = (60.0 * accepted_share_count) / (uptime ? uptime : 1.0);
|
|
||||||
double diff = net_diff > 0. ? net_diff : stratum_diff;
|
|
||||||
char diff_str[16];
|
|
||||||
double hrate = (double)global_hashrate;
|
|
||||||
struct cpu_info cpu = { 0 };
|
|
||||||
#ifdef USE_MONITORING
|
|
||||||
cpu.has_monitoring = true;
|
|
||||||
cpu.cpu_temp = cpu_temp(0);
|
|
||||||
cpu.cpu_fan = cpu_fanpercent();
|
|
||||||
cpu.cpu_clock = cpu_clock(0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
get_currentalgo(algo, sizeof(algo));
|
|
||||||
|
|
||||||
// if diff is integer don't display decimals
|
|
||||||
if ( diff == trunc( diff ) )
|
|
||||||
sprintf( diff_str, "%.0f", diff);
|
|
||||||
else
|
|
||||||
sprintf( diff_str, "%.6f", diff);
|
|
||||||
|
|
||||||
*buffer = '\0';
|
|
||||||
sprintf( buffer,
|
|
||||||
"NAME=%s;VER=%s;API=%s;"
|
|
||||||
"ALGO=%s;CPUS=%d;URL=%s;"
|
|
||||||
"HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
|
|
||||||
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
|
|
||||||
"UPTIME=%.0f;TS=%u|",
|
|
||||||
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
|
|
||||||
algo, opt_n_threads, short_url,
|
|
||||||
hrate, hrate/1000.0, accepted_share_count, rejected_share_count,
|
|
||||||
solved_block_count,
|
|
||||||
accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
|
|
||||||
uptime, (uint32_t) ts);
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns cpu/thread specific stats
|
|
||||||
*/
|
|
||||||
static char *getthreads(char *params)
|
|
||||||
{
|
|
||||||
*buffer = '\0';
|
|
||||||
for (int i = 0; i < opt_n_threads; i++)
|
|
||||||
cpustatus(i);
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Is remote control allowed ?
|
|
||||||
*/
|
|
||||||
static bool check_remote_access(void)
|
|
||||||
{
|
|
||||||
return (opt_api_remote > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Change pool url (see --url parameter)
|
|
||||||
* seturl|stratum+tcp://XeVrkPrWB7pDbdFLfKhF1Z3xpqhsx6wkH3:X@stratum+tcp://mine.xpool.ca:1131|
|
|
||||||
* seturl|stratum+tcp://Danila.1:X@pool.ipominer.com:3335|
|
|
||||||
*/
|
|
||||||
extern bool stratum_need_reset;
|
|
||||||
static char *remote_seturl(char *params)
|
|
||||||
{
|
|
||||||
*buffer = '\0';
|
|
||||||
if (!check_remote_access())
|
|
||||||
return buffer;
|
|
||||||
parse_arg('o', params);
|
|
||||||
stratum_need_reset = true;
|
|
||||||
sprintf(buffer, "%s", "ok|");
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*-hash*
|
|
||||||
* Ask the miner to quit
|
|
||||||
*/
|
|
||||||
static char *remote_quit(char *params)
|
|
||||||
{
|
|
||||||
*buffer = '\0';
|
|
||||||
if (!check_remote_access())
|
|
||||||
return buffer;
|
|
||||||
bye = 1;
|
|
||||||
sprintf(buffer, "%s", "bye|");
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
static char *gethelp(char *params);
|
|
||||||
struct CMDS {
|
|
||||||
const char *name;
|
|
||||||
char *(*func)(char *);
|
|
||||||
} cmds[] = {
|
|
||||||
{ "summary", getsummary },
|
|
||||||
{ "threads", getthreads },
|
|
||||||
/* remote functions */
|
|
||||||
{ "seturl", remote_seturl },
|
|
||||||
{ "quit", remote_quit },
|
|
||||||
/* keep it the last */
|
|
||||||
{ "help", gethelp },
|
|
||||||
};
|
|
||||||
#define CMDMAX ARRAY_SIZE(cmds)
|
|
||||||
|
|
||||||
static char *gethelp(char *params)
|
|
||||||
{
|
|
||||||
*buffer = '\0';
|
|
||||||
char * p = buffer;
|
|
||||||
for (int i = 0; i < CMDMAX-1; i++)
|
|
||||||
p += sprintf(p, "%s\n", cmds[i].name);
|
|
||||||
sprintf(p, "|");
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int send_result(SOCKETTYPE c, char *result)
|
|
||||||
{
|
|
||||||
int n;
|
|
||||||
if (!result) {
|
|
||||||
n = (int) send(c, "", 1, 0);
|
|
||||||
} else {
|
|
||||||
// ignore failure - it's closed immediately anyway
|
|
||||||
n = (int) send(c, result, (int) strlen(result) + 1, 0);
|
|
||||||
}
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---- Base64 Encoding/Decoding Table --- */
|
|
||||||
static const char table64[]=
|
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
||||||
|
|
||||||
static size_t base64_encode(const uchar *indata, size_t insize, char *outptr, size_t outlen)
|
|
||||||
{
|
|
||||||
uchar ibuf[3];
|
|
||||||
uchar obuf[4];
|
|
||||||
int i, inputparts, inlen = (int) insize;
|
|
||||||
size_t len = 0;
|
|
||||||
char *output, *outbuf;
|
|
||||||
|
|
||||||
memset(outptr, 0, outlen);
|
|
||||||
|
|
||||||
outbuf = output = (char*)calloc(1, inlen * 4 / 3 + 4);
|
|
||||||
if (outbuf == NULL) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (inlen > 0) {
|
|
||||||
for (i = inputparts = 0; i < 3; i++) {
|
|
||||||
if (inlen > 0) {
|
|
||||||
inputparts++;
|
|
||||||
ibuf[i] = (uchar) *indata;
|
|
||||||
indata++; inlen--;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
ibuf[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
obuf[0] = (uchar) ((ibuf[0] & 0xFC) >> 2);
|
|
||||||
obuf[1] = (uchar) (((ibuf[0] & 0x03) << 4) | ((ibuf[1] & 0xF0) >> 4));
|
|
||||||
obuf[2] = (uchar) (((ibuf[1] & 0x0F) << 2) | ((ibuf[2] & 0xC0) >> 6));
|
|
||||||
obuf[3] = (uchar) (ibuf[2] & 0x3F);
|
|
||||||
|
|
||||||
switch(inputparts) {
|
|
||||||
case 1: /* only one byte read */
|
|
||||||
snprintf(output, 5, "%c%c==",
|
|
||||||
table64[obuf[0]],
|
|
||||||
table64[obuf[1]]);
|
|
||||||
break;
|
|
||||||
case 2: /* two bytes read */
|
|
||||||
snprintf(output, 5, "%c%c%c=",
|
|
||||||
table64[obuf[0]],
|
|
||||||
table64[obuf[1]],
|
|
||||||
table64[obuf[2]]);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
snprintf(output, 5, "%c%c%c%c",
|
|
||||||
table64[obuf[0]],
|
|
||||||
table64[obuf[1]],
|
|
||||||
table64[obuf[2]],
|
|
||||||
table64[obuf[3]] );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if ((len+4) > outlen)
|
|
||||||
break;
|
|
||||||
output += 4; len += 4;
|
|
||||||
}
|
|
||||||
len = snprintf(outptr, len, "%s", outbuf);
|
|
||||||
// todo: seems to be missing on linux
|
|
||||||
if (strlen(outptr) == 27)
|
|
||||||
strcat(outptr, "=");
|
|
||||||
free(outbuf);
|
|
||||||
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
|
|
||||||
//#include "compat/curl-for-windows/openssl/openssl/crypto/sha/sha.h"
|
|
||||||
|
|
||||||
/* websocket handshake (tested in Chrome) */
|
|
||||||
static int websocket_handshake(SOCKETTYPE c, char *result, char *clientkey)
|
|
||||||
{
|
|
||||||
char answer[256];
|
|
||||||
char inpkey[128] = { 0 };
|
|
||||||
char seckey[64];
|
|
||||||
uchar sha1[20];
|
|
||||||
|
|
||||||
if (opt_protocol)
|
|
||||||
applog(LOG_DEBUG, "clientkey: %s", clientkey);
|
|
||||||
|
|
||||||
sprintf(inpkey, "%s258EAFA5-E914-47DA-95CA-C5AB0DC85B11", clientkey);
|
|
||||||
|
|
||||||
// SHA-1 test from rfc, returns in base64 "s3pPLMBiTxaQ9kYGzzhZRbK+xOo="
|
|
||||||
//sprintf(inpkey, "dGhlIHNhbXBsZSBub25jZQ==258EAFA5-E914-47DA-95CA-C5AB0DC85B11");
|
|
||||||
|
|
||||||
sph_sha1_full( sha1, inpkey, strlen(inpkey) );
|
|
||||||
|
|
||||||
base64_encode(sha1, 20, seckey, sizeof(seckey));
|
|
||||||
|
|
||||||
sprintf(answer,
|
|
||||||
"HTTP/1.1 101 Switching Protocol\r\n"
|
|
||||||
"Upgrade: WebSocket\r\nConnection: Upgrade\r\n"
|
|
||||||
"Sec-WebSocket-Accept: %s\r\n"
|
|
||||||
"Sec-WebSocket-Protocol: text\r\n"
|
|
||||||
"\r\n", seckey);
|
|
||||||
|
|
||||||
// data result as tcp frame
|
|
||||||
|
|
||||||
uchar hd[10] = { 0 };
|
|
||||||
hd[0] = 129; // 0x1 text frame (FIN + opcode)
|
|
||||||
uint64_t datalen = (uint64_t) strlen(result);
|
|
||||||
uint8_t frames = 2;
|
|
||||||
if (datalen <= 125) {
|
|
||||||
hd[1] = (uchar) (datalen);
|
|
||||||
} else if (datalen <= 65535) {
|
|
||||||
hd[1] = (uchar) 126;
|
|
||||||
hd[2] = (uchar) (datalen >> 8);
|
|
||||||
hd[3] = (uchar) (datalen);
|
|
||||||
frames = 4;
|
|
||||||
} else {
|
|
||||||
hd[1] = (uchar) 127;
|
|
||||||
hd[2] = (uchar) (datalen >> 56);
|
|
||||||
hd[3] = (uchar) (datalen >> 48);
|
|
||||||
hd[4] = (uchar) (datalen >> 40);
|
|
||||||
hd[5] = (uchar) (datalen >> 32);
|
|
||||||
hd[6] = (uchar) (datalen >> 24);
|
|
||||||
hd[7] = (uchar) (datalen >> 16);
|
|
||||||
hd[8] = (uchar) (datalen >> 8);
|
|
||||||
hd[9] = (uchar) (datalen);
|
|
||||||
frames = 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t handlen = strlen(answer);
|
|
||||||
uchar *data = (uchar*) calloc(1, handlen + frames + (size_t) datalen + 1);
|
|
||||||
if (data == NULL)
|
|
||||||
return -1;
|
|
||||||
else {
|
|
||||||
uchar *p = data;
|
|
||||||
// HTTP header 101
|
|
||||||
memcpy(p, answer, handlen);
|
|
||||||
p += handlen;
|
|
||||||
// WebSocket Frame - Header + Data
|
|
||||||
memcpy(p, hd, frames);
|
|
||||||
memcpy(p + frames, result, (size_t)datalen);
|
|
||||||
send(c, (const char*)data, (int) (strlen(answer) + frames + (size_t)datalen + 1), 0);
|
|
||||||
free(data);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* N.B. IP4 addresses are by Definition 32bit big endian on all platforms
|
|
||||||
*/
|
|
||||||
static void setup_ipaccess()
|
|
||||||
{
|
|
||||||
char *buf = NULL, *ptr, *comma, *slash, *dot;
|
|
||||||
int ipcount, mask, octet, i;
|
|
||||||
char group;
|
|
||||||
|
|
||||||
buf = (char*) calloc(1, strlen(opt_api_allow) + 1);
|
|
||||||
if (unlikely(!buf))
|
|
||||||
proper_exit(1);//, "Failed to malloc ipaccess buf");
|
|
||||||
|
|
||||||
strcpy(buf, opt_api_allow);
|
|
||||||
ipcount = 1;
|
|
||||||
ptr = buf;
|
|
||||||
while (*ptr) if (*(ptr++) == ',')
|
|
||||||
ipcount++;
|
|
||||||
|
|
||||||
// possibly more than needed, but never less
|
|
||||||
ipaccess = (struct IP4ACCESS *) calloc(ipcount, sizeof(struct IP4ACCESS));
|
|
||||||
if (unlikely(!ipaccess))
|
|
||||||
proper_exit(1);//, "Failed to calloc ipaccess");
|
|
||||||
|
|
||||||
ips = 0;
|
|
||||||
ptr = buf;
|
|
||||||
while (ptr && *ptr) {
|
|
||||||
while (*ptr == ' ' || *ptr == '\t')
|
|
||||||
ptr++;
|
|
||||||
|
|
||||||
if (*ptr == ',') {
|
|
||||||
ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
comma = strchr(ptr, ',');
|
|
||||||
if (comma)
|
|
||||||
*(comma++) = '\0';
|
|
||||||
|
|
||||||
group = NOPRIVGROUP;
|
|
||||||
|
|
||||||
if (isalpha(*ptr) && *(ptr+1) == ':') {
|
|
||||||
if (DEFINEDGROUP(*ptr))
|
|
||||||
group = GROUP(*ptr);
|
|
||||||
ptr += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
ipaccess[ips].group = group;
|
|
||||||
|
|
||||||
if (strcmp(ptr, ALLIP4) == 0)
|
|
||||||
ipaccess[ips].ip = ipaccess[ips].mask = 0;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
slash = strchr(ptr, '/');
|
|
||||||
if (!slash)
|
|
||||||
ipaccess[ips].mask = 0xffffffff;
|
|
||||||
else {
|
|
||||||
*(slash++) = '\0';
|
|
||||||
mask = atoi(slash);
|
|
||||||
if (mask < 1 || mask > 32)
|
|
||||||
goto popipo; // skip invalid/zero
|
|
||||||
|
|
||||||
ipaccess[ips].mask = 0;
|
|
||||||
while (mask-- >= 0) {
|
|
||||||
octet = 1 << (mask % 8);
|
|
||||||
ipaccess[ips].mask |= (octet << (24 - (8 * (mask >> 3))));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ipaccess[ips].ip = 0; // missing default to '.0'
|
|
||||||
for (i = 0; ptr && (i < 4); i++) {
|
|
||||||
dot = strchr(ptr, '.');
|
|
||||||
if (dot)
|
|
||||||
*(dot++) = '\0';
|
|
||||||
octet = atoi(ptr);
|
|
||||||
|
|
||||||
if (octet < 0 || octet > 0xff)
|
|
||||||
goto popipo; // skip invalid
|
|
||||||
|
|
||||||
ipaccess[ips].ip |= (octet << (24 - (i * 8)));
|
|
||||||
|
|
||||||
ptr = dot;
|
|
||||||
}
|
|
||||||
|
|
||||||
ipaccess[ips].ip &= ipaccess[ips].mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
ips++;
|
|
||||||
popipo:
|
|
||||||
ptr = comma;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool check_connect(struct sockaddr_in *cli, char **connectaddr, char *group)
|
|
||||||
{
|
|
||||||
bool addrok = false;
|
|
||||||
|
|
||||||
*connectaddr = inet_ntoa(cli->sin_addr);
|
|
||||||
|
|
||||||
*group = NOPRIVGROUP;
|
|
||||||
if (opt_api_allow) {
|
|
||||||
int client_ip = htonl(cli->sin_addr.s_addr);
|
|
||||||
for (int i = 0; i < ips; i++) {
|
|
||||||
if ((client_ip & ipaccess[i].mask) == ipaccess[i].ip) {
|
|
||||||
addrok = true;
|
|
||||||
*group = ipaccess[i].group;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
addrok = (strcmp(*connectaddr, localaddr) == 0);
|
|
||||||
|
|
||||||
return addrok;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void api()
|
|
||||||
{
|
|
||||||
const char *addr = opt_api_allow;
|
|
||||||
unsigned short port = (unsigned short) opt_api_listen; // 4048
|
|
||||||
char buf[MYBUFSIZ];
|
|
||||||
int c, n, bound;
|
|
||||||
char *connectaddr;
|
|
||||||
char *binderror;
|
|
||||||
char group;
|
|
||||||
time_t bindstart;
|
|
||||||
struct sockaddr_in serv;
|
|
||||||
struct sockaddr_in cli;
|
|
||||||
uint32_t clisiz;
|
|
||||||
bool addrok = false;
|
|
||||||
long long counter;
|
|
||||||
char *result;
|
|
||||||
char *params;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
SOCKETTYPE *apisock;
|
|
||||||
if (!opt_api_listen && opt_debug) {
|
|
||||||
applog(LOG_DEBUG, "API disabled");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opt_api_allow) {
|
|
||||||
setup_ipaccess();
|
|
||||||
if (ips == 0) {
|
|
||||||
applog(LOG_WARNING, "API not running (no valid IPs specified)%s", UNAVAILABLE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
apisock = (SOCKETTYPE*) calloc(1, sizeof(*apisock));
|
|
||||||
*apisock = INVSOCK;
|
|
||||||
|
|
||||||
sleep(1);
|
|
||||||
|
|
||||||
*apisock = socket(AF_INET, SOCK_STREAM, 0);
|
|
||||||
if (*apisock == INVSOCK) {
|
|
||||||
applog(LOG_ERR, "API initialisation failed (%s)%s", strerror(errno), UNAVAILABLE);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&serv, 0, sizeof(serv));
|
|
||||||
serv.sin_family = AF_INET;
|
|
||||||
serv.sin_addr.s_addr = inet_addr(addr);
|
|
||||||
if (serv.sin_addr.s_addr == (in_addr_t)INVINETADDR) {
|
|
||||||
applog(LOG_ERR, "API initialisation 2 failed (%s)%s", strerror(errno), UNAVAILABLE);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
serv.sin_port = htons(port);
|
|
||||||
|
|
||||||
#ifndef WIN32
|
|
||||||
// On linux with SO_REUSEADDR, bind will get the port if the previous
|
|
||||||
// socket is closed (even if it is still in TIME_WAIT) but fail if
|
|
||||||
// another program has it open - which is what we want
|
|
||||||
int optval = 1;
|
|
||||||
// If it doesn't work, we don't really care - just show a debug message
|
|
||||||
if (SOCKETFAIL(setsockopt(*apisock, SOL_SOCKET, SO_REUSEADDR, (void *)(&optval), sizeof(optval))))
|
|
||||||
applog(LOG_DEBUG, "API setsockopt SO_REUSEADDR failed (ignored): %s", SOCKERRMSG);
|
|
||||||
#else
|
|
||||||
// On windows a 2nd program can bind to a port>1024 already in use unless
|
|
||||||
// SO_EXCLUSIVEADDRUSE is used - however then the bind to a closed port
|
|
||||||
// in TIME_WAIT will fail until the timeout - so we leave the options alone
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// try for 1 minute ... in case the old one hasn't completely gone yet
|
|
||||||
bound = 0;
|
|
||||||
bindstart = time(NULL);
|
|
||||||
while (bound == 0) {
|
|
||||||
if (bind(*apisock, (struct sockaddr *)(&serv), sizeof(serv)) < 0) {
|
|
||||||
binderror = strerror(errno);
|
|
||||||
if ((time(NULL) - bindstart) > 61)
|
|
||||||
break;
|
|
||||||
else {
|
|
||||||
if (!opt_quiet || opt_debug)
|
|
||||||
applog(LOG_WARNING, "API bind to port %d failed - trying again in 20sec", port);
|
|
||||||
sleep(20);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bound = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bound == 0) {
|
|
||||||
applog(LOG_WARNING, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
|
|
||||||
free(apisock);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (SOCKETFAIL(listen(*apisock, QUEUE))) {
|
|
||||||
applog(LOG_ERR, "API initialisation 3 failed (%s)%s", strerror(errno), UNAVAILABLE);
|
|
||||||
CLOSESOCKET(*apisock);
|
|
||||||
free(apisock);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer = (char *) calloc(1, MYBUFSIZ + 1);
|
|
||||||
|
|
||||||
counter = 0;
|
|
||||||
while (bye == 0) {
|
|
||||||
counter++;
|
|
||||||
|
|
||||||
clisiz = sizeof(cli);
|
|
||||||
if (SOCKETFAIL(c = accept((SOCKETTYPE)*apisock, (struct sockaddr *)(&cli), &clisiz))) {
|
|
||||||
applog(LOG_ERR, "API failed (%s)%s", strerror(errno), UNAVAILABLE);
|
|
||||||
CLOSESOCKET(*apisock);
|
|
||||||
free(apisock);
|
|
||||||
free(buffer);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
addrok = check_connect(&cli, &connectaddr, &group);
|
|
||||||
if (opt_debug && opt_protocol)
|
|
||||||
applog(LOG_DEBUG, "API: connection from %s - %s",
|
|
||||||
connectaddr, addrok ? "Accepted" : "Ignored");
|
|
||||||
|
|
||||||
if (addrok) {
|
|
||||||
bool fail;
|
|
||||||
char *wskey = NULL;
|
|
||||||
n = recv(c, &buf[0], SOCK_REC_BUFSZ, 0);
|
|
||||||
|
|
||||||
fail = SOCKETFAIL(n);
|
|
||||||
if (fail)
|
|
||||||
buf[0] = '\0';
|
|
||||||
else if (n > 0 && buf[n-1] == '\n') {
|
|
||||||
/* telnet compat \r\n */
|
|
||||||
buf[n-1] = '\0'; n--;
|
|
||||||
if (n > 0 && buf[n-1] == '\r')
|
|
||||||
buf[n-1] = '\0';
|
|
||||||
}
|
|
||||||
if (n >= 0)
|
|
||||||
buf[n] = '\0';
|
|
||||||
|
|
||||||
//if (opt_debug && opt_protocol && n > 0)
|
|
||||||
// applog(LOG_DEBUG, "API: recv command: (%d) '%s'+char(%x)", n, buf, buf[n-1]);
|
|
||||||
|
|
||||||
if (!fail) {
|
|
||||||
char *msg = NULL;
|
|
||||||
/* Websocket requests compat. */
|
|
||||||
if ((msg = strstr(buf, "GET /")) && strlen(msg) > 5) {
|
|
||||||
char cmd[256] = { 0 };
|
|
||||||
sscanf(&msg[5], "%s\n", cmd);
|
|
||||||
params = strchr(cmd, '/');
|
|
||||||
if (params)
|
|
||||||
*(params++) = '|';
|
|
||||||
params = strchr(cmd, '/');
|
|
||||||
if (params)
|
|
||||||
*(params++) = '\0';
|
|
||||||
wskey = strstr(msg, "Sec-WebSocket-Key");
|
|
||||||
if (wskey) {
|
|
||||||
char *eol = strchr(wskey, '\r');
|
|
||||||
if (eol) *eol = '\0';
|
|
||||||
wskey = strchr(wskey, ':');
|
|
||||||
wskey++;
|
|
||||||
while ((*wskey) == ' ') wskey++; // ltrim
|
|
||||||
}
|
|
||||||
n = sprintf(buf, "%s", cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
params = strchr(buf, '|');
|
|
||||||
if (params != NULL)
|
|
||||||
*(params++) = '\0';
|
|
||||||
|
|
||||||
if (opt_debug && opt_protocol && n > 0)
|
|
||||||
applog(LOG_DEBUG, "API: exec command %s(%s)", buf, params);
|
|
||||||
|
|
||||||
for (i = 0; i < CMDMAX; i++) {
|
|
||||||
if (strcmp(buf, cmds[i].name) == 0) {
|
|
||||||
if (params && strlen(params)) {
|
|
||||||
// remove possible trailing |
|
|
||||||
if (params[strlen(params) - 1] == '|')
|
|
||||||
params[strlen(params) - 1] = '\0';
|
|
||||||
}
|
|
||||||
result = (cmds[i].func)(params);
|
|
||||||
if (wskey) {
|
|
||||||
websocket_handshake(c, result, wskey);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
send_result(c, result);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CLOSESOCKET(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CLOSESOCKET(*apisock);
|
|
||||||
free(apisock);
|
|
||||||
free(buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* external access */
|
|
||||||
void *api_thread(void *userdata)
|
|
||||||
{
|
|
||||||
struct thr_info *mythr = (struct thr_info*)userdata;
|
|
||||||
|
|
||||||
startup = time(NULL);
|
|
||||||
api();
|
|
||||||
tq_freeze(mythr->q);
|
|
||||||
|
|
||||||
if (bye) {
|
|
||||||
// quit command
|
|
||||||
proper_exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
@@ -1,57 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# This script is not intended for users, it is only used for compile testing
|
|
||||||
# during develpment. However the information contained may provide compilation
|
|
||||||
# tips to users.
|
|
||||||
|
|
||||||
rm cpuminer cpuminer-armv9-crypto-sha3 cpuminer-armv9-crypto cpuminer-armv9 cpuminer-armv8.5-crypto-sha3-sve2 cpuminer-armv8.4-crypto-sha3 cpuminer-armv8 cpuminer-armv8-crypto cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2-sha cpuminer-avx2-sha-vaes cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 cpuminer-zen4 cpuminer-alderlake cpuminer-x64 > /dev/null
|
|
||||||
|
|
||||||
# armv9 needs gcc-13
|
|
||||||
# -march-armv9-a includes SVE2 but no crypto
|
|
||||||
# -march=armv9-a+crypto adds AES & SHA2 but not SHA512
|
|
||||||
|
|
||||||
make distclean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
./autogen.sh || echo done
|
|
||||||
CFLAGS="-O3 -march=armv9-a+crypto+sha3 -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv9-crypto-sha3
|
|
||||||
|
|
||||||
make clean || echo clean
|
|
||||||
CFLAGS="-O3 -march=armv9-a+crypto -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv9-crypto
|
|
||||||
|
|
||||||
make clean || echo clean
|
|
||||||
CFLAGS="-O3 -march=armv9-a -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv9
|
|
||||||
|
|
||||||
# SVE2 available in armv8.5
|
|
||||||
make clean || echo clean
|
|
||||||
CFLAGS="-O3 -march=armv8.5-a+crypto+sha3+sve2 -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv8.5-crypto-sha3-sve2
|
|
||||||
|
|
||||||
# SHA3 available in armv8.4
|
|
||||||
make clean || echo clean
|
|
||||||
CFLAGS="-O3 -march=armv8.4-a+crypto+sha3 -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv8.4-crypto-sha3
|
|
||||||
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=armv8-a+crypto -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv8-crypto
|
|
||||||
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=armv8-a -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
mv cpuminer cpuminer-armv8
|
|
||||||
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
@@ -1,13 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
# You need autoconf 2.5x, preferably 2.57 or later
|
|
||||||
# You need automake 1.7 or later. 1.6 might work.
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
aclocal
|
|
||||||
autoheader
|
|
||||||
automake --foreign --add-missing --force-missing --copy
|
|
||||||
# automake --gnu --add-missing --copy
|
|
||||||
autoconf
|
|
||||||
|
|
@@ -1,375 +0,0 @@
|
|||||||
/* src/config/bitcoin-config.h. Generated from bitcoin-config.h.in by configure. */
|
|
||||||
/* src/config/bitcoin-config.h.in. Generated from configure.ac by autoheader. */
|
|
||||||
|
|
||||||
#ifndef BITCOIN_CONFIG_H
|
|
||||||
|
|
||||||
#define BITCOIN_CONFIG_H
|
|
||||||
|
|
||||||
/* Define if building universal (internal helper macro) */
|
|
||||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
|
||||||
|
|
||||||
/* Version Build */
|
|
||||||
#define CLIENT_VERSION_BUILD 0
|
|
||||||
|
|
||||||
/* Version is release */
|
|
||||||
#define CLIENT_VERSION_IS_RELEASE true
|
|
||||||
|
|
||||||
/* Major version */
|
|
||||||
#define CLIENT_VERSION_MAJOR 0
|
|
||||||
|
|
||||||
/* Minor version */
|
|
||||||
#define CLIENT_VERSION_MINOR 11
|
|
||||||
|
|
||||||
/* Build revision */
|
|
||||||
#define CLIENT_VERSION_REVISION 2
|
|
||||||
|
|
||||||
/* Version is release */
|
|
||||||
#define COPYRIGHT_YEAR 2015
|
|
||||||
|
|
||||||
/* Define to 1 to enable wallet functions */
|
|
||||||
#define ENABLE_WALLET 1
|
|
||||||
|
|
||||||
/* parameter and return value type for __fdelt_chk */
|
|
||||||
/* #undef FDELT_TYPE */
|
|
||||||
|
|
||||||
/* define if the Boost library is available */
|
|
||||||
#define HAVE_BOOST /**/
|
|
||||||
|
|
||||||
/* define if the Boost::Chrono library is available */
|
|
||||||
#define HAVE_BOOST_CHRONO /**/
|
|
||||||
|
|
||||||
/* define if the Boost::Filesystem library is available */
|
|
||||||
#define HAVE_BOOST_FILESYSTEM /**/
|
|
||||||
|
|
||||||
/* define if the Boost::PROGRAM_OPTIONS library is available */
|
|
||||||
#define HAVE_BOOST_PROGRAM_OPTIONS /**/
|
|
||||||
|
|
||||||
/* define if the Boost::System library is available */
|
|
||||||
#define HAVE_BOOST_SYSTEM /**/
|
|
||||||
|
|
||||||
/* define if the Boost::Thread library is available */
|
|
||||||
#define HAVE_BOOST_THREAD /**/
|
|
||||||
|
|
||||||
/* define if the Boost::Unit_Test_Framework library is available */
|
|
||||||
#define HAVE_BOOST_UNIT_TEST_FRAMEWORK /**/
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <byteswap.h> header file. */
|
|
||||||
#define HAVE_BYTESWAP_H 1
|
|
||||||
|
|
||||||
/* Define this symbol if the consensus lib has been built */
|
|
||||||
#define HAVE_CONSENSUS_LIB 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `be16toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BE16TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `be32toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BE32TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BE64TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `bswap_16', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BSWAP_16 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `bswap_32', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BSWAP_32 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `bswap_64', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BSWAP_64 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `htobe16', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_HTOBE16 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `htobe32', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_HTOBE32 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `htobe64', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_HTOBE64 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `htole16', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_HTOLE16 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `htole32', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_HTOLE32 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `htole64', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_HTOLE64 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `le16toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_LE16TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `le32toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_LE32TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `le64toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_LE64TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_STRERROR_R 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `strnlen', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_STRNLEN 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
|
||||||
#define HAVE_DLFCN_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <endian.h> header file. */
|
|
||||||
#define HAVE_ENDIAN_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the `dllexport' function attribute */
|
|
||||||
/* #undef HAVE_FUNC_ATTRIBUTE_DLLEXPORT */
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the `dllimport' function attribute */
|
|
||||||
/* #undef HAVE_FUNC_ATTRIBUTE_DLLIMPORT */
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the `visibility' function attribute */
|
|
||||||
#define HAVE_FUNC_ATTRIBUTE_VISIBILITY 1
|
|
||||||
|
|
||||||
/* Define this symbol if you have getaddrinfo_a */
|
|
||||||
#define HAVE_GETADDRINFO_A 1
|
|
||||||
|
|
||||||
/* Define this symbol if you have inet_pton */
|
|
||||||
#define HAVE_INET_PTON 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
|
||||||
#define HAVE_INTTYPES_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `advapi32' library (-ladvapi32). */
|
|
||||||
/* #undef HAVE_LIBADVAPI32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `comctl32' library (-lcomctl32). */
|
|
||||||
/* #undef HAVE_LIBCOMCTL32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `comdlg32' library (-lcomdlg32). */
|
|
||||||
/* #undef HAVE_LIBCOMDLG32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `crypt32' library (-lcrypt32). */
|
|
||||||
/* #undef HAVE_LIBCRYPT32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `crypto' library (-lcrypto). */
|
|
||||||
#define HAVE_LIBCRYPTO 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `gdi32' library (-lgdi32). */
|
|
||||||
/* #undef HAVE_LIBGDI32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `imm32' library (-limm32). */
|
|
||||||
/* #undef HAVE_LIBIMM32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `iphlpapi' library (-liphlpapi). */
|
|
||||||
/* #undef HAVE_LIBIPHLPAPI */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `jpeg ' library (-ljpeg ). */
|
|
||||||
/* #undef HAVE_LIBJPEG_ */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `kernel32' library (-lkernel32). */
|
|
||||||
/* #undef HAVE_LIBKERNEL32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `mingwthrd' library (-lmingwthrd). */
|
|
||||||
/* #undef HAVE_LIBMINGWTHRD */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `mswsock' library (-lmswsock). */
|
|
||||||
/* #undef HAVE_LIBMSWSOCK */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `ole32' library (-lole32). */
|
|
||||||
/* #undef HAVE_LIBOLE32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `oleaut32' library (-loleaut32). */
|
|
||||||
/* #undef HAVE_LIBOLEAUT32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `png ' library (-lpng ). */
|
|
||||||
/* #undef HAVE_LIBPNG_ */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `rpcrt4' library (-lrpcrt4). */
|
|
||||||
/* #undef HAVE_LIBRPCRT4 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `shell32' library (-lshell32). */
|
|
||||||
/* #undef HAVE_LIBSHELL32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `shlwapi' library (-lshlwapi). */
|
|
||||||
/* #undef HAVE_LIBSHLWAPI */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `ssp' library (-lssp). */
|
|
||||||
/* #undef HAVE_LIBSSP */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `user32' library (-luser32). */
|
|
||||||
/* #undef HAVE_LIBUSER32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `uuid' library (-luuid). */
|
|
||||||
/* #undef HAVE_LIBUUID */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `winmm' library (-lwinmm). */
|
|
||||||
/* #undef HAVE_LIBWINMM */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `winspool' library (-lwinspool). */
|
|
||||||
/* #undef HAVE_LIBWINSPOOL */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `ws2_32' library (-lws2_32). */
|
|
||||||
/* #undef HAVE_LIBWS2_32 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `z ' library (-lz ). */
|
|
||||||
/* #undef HAVE_LIBZ_ */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
|
||||||
#define HAVE_MEMORY_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <miniupnpc/miniupnpc.h> header file. */
|
|
||||||
#define HAVE_MINIUPNPC_MINIUPNPC_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <miniupnpc/miniwget.h> header file. */
|
|
||||||
#define HAVE_MINIUPNPC_MINIWGET_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <miniupnpc/upnpcommands.h> header file. */
|
|
||||||
#define HAVE_MINIUPNPC_UPNPCOMMANDS_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <miniupnpc/upnperrors.h> header file. */
|
|
||||||
#define HAVE_MINIUPNPC_UPNPERRORS_H 1
|
|
||||||
|
|
||||||
/* Define this symbol if you have MSG_NOSIGNAL */
|
|
||||||
#define HAVE_MSG_NOSIGNAL 1
|
|
||||||
|
|
||||||
/* Define if you have POSIX threads libraries and header files. */
|
|
||||||
#define HAVE_PTHREAD 1
|
|
||||||
|
|
||||||
/* Have PTHREAD_PRIO_INHERIT. */
|
|
||||||
#define HAVE_PTHREAD_PRIO_INHERIT 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
|
||||||
#define HAVE_STDINT_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdio.h> header file. */
|
|
||||||
#define HAVE_STDIO_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
|
||||||
#define HAVE_STDLIB_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `strerror_r' function. */
|
|
||||||
#define HAVE_STRERROR_R 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <strings.h> header file. */
|
|
||||||
#define HAVE_STRINGS_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <string.h> header file. */
|
|
||||||
#define HAVE_STRING_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/endian.h> header file. */
|
|
||||||
/* #undef HAVE_SYS_ENDIAN_H */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/prctl.h> header file. */
|
|
||||||
#define HAVE_SYS_PRCTL_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/select.h> header file. */
|
|
||||||
#define HAVE_SYS_SELECT_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
|
||||||
#define HAVE_SYS_STAT_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
|
||||||
#define HAVE_SYS_TYPES_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
|
||||||
#define HAVE_UNISTD_H 1
|
|
||||||
|
|
||||||
/* Define if the visibility attribute is supported. */
|
|
||||||
#define HAVE_VISIBILITY_ATTRIBUTE 1
|
|
||||||
|
|
||||||
/* Define this symbol if boost sleep works */
|
|
||||||
/* #undef HAVE_WORKING_BOOST_SLEEP */
|
|
||||||
|
|
||||||
/* Define this symbol if boost sleep_for works */
|
|
||||||
#define HAVE_WORKING_BOOST_SLEEP_FOR 1
|
|
||||||
|
|
||||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
|
||||||
*/
|
|
||||||
#define LT_OBJDIR ".libs/"
|
|
||||||
|
|
||||||
/* Define to the address where bug reports for this package should be sent. */
|
|
||||||
//#define PACKAGE_BUGREPORT "https://github.com/FreeTrade/HOdlcoin/issues"
|
|
||||||
|
|
||||||
/* Define to the full name of this package. */
|
|
||||||
//#define PACKAGE_NAME "HOdlcoin Core"
|
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
|
||||||
//#define PACKAGE_STRING "HOdlcoin Core 0.11.2"
|
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
|
||||||
//#define PACKAGE_TARNAME "hodlcoin"
|
|
||||||
|
|
||||||
/* Define to the home page for this package. */
|
|
||||||
//#define PACKAGE_URL ""
|
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
|
||||||
//#define PACKAGE_VERSION "0.11.2"
|
|
||||||
|
|
||||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
|
||||||
your system. */
|
|
||||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
|
||||||
|
|
||||||
/* Define this symbol if the qt platform is cocoa */
|
|
||||||
/* #undef QT_QPA_PLATFORM_COCOA */
|
|
||||||
|
|
||||||
/* Define this symbol if the qt platform is windows */
|
|
||||||
/* #undef QT_QPA_PLATFORM_WINDOWS */
|
|
||||||
|
|
||||||
/* Define this symbol if the qt platform is xcb */
|
|
||||||
/* #undef QT_QPA_PLATFORM_XCB */
|
|
||||||
|
|
||||||
/* Define this symbol if qt plugins are static */
|
|
||||||
/* #undef QT_STATICPLUGIN */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
|
||||||
#define STDC_HEADERS 1
|
|
||||||
|
|
||||||
/* Define to 1 if strerror_r returns char *. */
|
|
||||||
#define STRERROR_R_CHAR_P 1
|
|
||||||
|
|
||||||
/* Define if dbus support should be compiled in */
|
|
||||||
/* #undef USE_DBUS */
|
|
||||||
|
|
||||||
/* Define if QR support should be compiled in */
|
|
||||||
/* #undef USE_QRCODE */
|
|
||||||
|
|
||||||
/* UPnP support not compiled if undefined, otherwise value (0 or 1) determines
|
|
||||||
default state */
|
|
||||||
#define USE_UPNP 0
|
|
||||||
|
|
||||||
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
|
|
||||||
significant byte first (like Motorola and SPARC, unlike Intel). */
|
|
||||||
#if defined AC_APPLE_UNIVERSAL_BUILD
|
|
||||||
# if defined __BIG_ENDIAN__
|
|
||||||
# define WORDS_BIGENDIAN 1
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
# ifndef WORDS_BIGENDIAN
|
|
||||||
/* # undef WORDS_BIGENDIAN */
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Enable large inode numbers on Mac OS X 10.5. */
|
|
||||||
#ifndef _DARWIN_USE_64_BIT_INODE
|
|
||||||
# define _DARWIN_USE_64_BIT_INODE 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
|
||||||
/* #undef _FILE_OFFSET_BITS */
|
|
||||||
|
|
||||||
/* Define for large files, on AIX-style hosts. */
|
|
||||||
/* #undef _LARGE_FILES */
|
|
||||||
|
|
||||||
#endif //BITCOIN_CONFIG_H
|
|
@@ -1,181 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# This script is not intended for users, it is only used for compile testing
|
|
||||||
# during develpment. However the information contained may provide compilation
|
|
||||||
# tips to users.
|
|
||||||
|
|
||||||
rm cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 cpuminer-zen4 cpuminer-zen5 cpuminer-alderlake cpuminer-x64 cpuminer-armv8 cpuminer-armv8-aes cpuminer-armv8-sha2 cpuminer-armv8-aes-sha2 > /dev/null
|
|
||||||
|
|
||||||
# AVX512 SHA VAES: Intel Core Icelake, Rocketlake
|
|
||||||
make distclean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
./autogen.sh || echo done
|
|
||||||
CFLAGS="-O3 -march=icelake-client -Wall" ./configure --with-curl
|
|
||||||
# Rocketlake needs gcc-11
|
|
||||||
#CFLAGS="-O3 -march=rocketlake -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx512-sha-vaes
|
|
||||||
|
|
||||||
# Intel Core Alderlake: AVX2 SHA VAES, needs gcc-12
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=alderlake -Wall" ./configure --with-curl
|
|
||||||
#make -j 8
|
|
||||||
#strip -s cpuminer
|
|
||||||
#mv cpuminer cpuminer-alderlake
|
|
||||||
|
|
||||||
# Intel Core Arrowlake-s: AVX2 SHA512 VAES, needs gcc-14
|
|
||||||
# Arrowlake-s includes SHA512, Arrowlake does not?
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=arrowlake-s -Wall" ./configure --with-curl
|
|
||||||
#make -j 8
|
|
||||||
#strip -s cpuminer
|
|
||||||
#mv cpuminer cpuminer-arrowlake-s
|
|
||||||
|
|
||||||
# Intel Core Graniterapids: AVX512, SHA256, VAES, needs gcc-14
|
|
||||||
# Apparently Granitrapids will not include AVX10, SHA512 or APX,
|
|
||||||
# wait for Diamondrapids & gcc-15.
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=graniterapids -Wall" ./configure --with-curl
|
|
||||||
#make -j 8
|
|
||||||
#strip -s cpuminer
|
|
||||||
#mv cpuminer cpuminer-graniterapids
|
|
||||||
|
|
||||||
# Force AVX10-256
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=arrowlake-s -mavx10.1-256 -Wall" ./configure --with-curl
|
|
||||||
#make -j 8
|
|
||||||
#strip -s cpuminer
|
|
||||||
#mv cpuminer cpuminer-avx10-256
|
|
||||||
|
|
||||||
# Force SHA512 AVX10-512
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=graniterapids -msha512 -mavx10.1-512 -Wall" ./configure --with-curl
|
|
||||||
#make -j 8
|
|
||||||
#strip -s cpuminer
|
|
||||||
#mv cpuminer cpuminer-avx10-512
|
|
||||||
|
|
||||||
# Zen5: AVX512 SHA VAES, requires gcc-14.
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=znver5 -Wall" ./configure --with-curl
|
|
||||||
#make -j $(nproc)
|
|
||||||
#strip -s cpuminer
|
|
||||||
#mv cpuminer cpuminer-zen5
|
|
||||||
|
|
||||||
# Zen4: AVX512 SHA VAES
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
# znver3 needs gcc-11, znver4 needs gcc-12.3.
|
|
||||||
#CFLAGS="-O3 -march=znver4 -Wall" ./configure --with-curl
|
|
||||||
# Inclomplete list of Zen4 AVX512 extensions but includes all extensions used by cpuminer.
|
|
||||||
CFLAGS="-O3 -march=znver3 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq -Wall" ./configure --with-curl
|
|
||||||
#CFLAGS="-O3 -march=znver2 -mvaes -mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-zen4
|
|
||||||
|
|
||||||
# Zen3 AVX2 SHA VAES
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=znver2 -mvaes" ./configure --with-curl
|
|
||||||
CFLAGS="-O3 -march=znver3 -fno-common " ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-zen3
|
|
||||||
|
|
||||||
# AVX512 AES: Intel Core HEDT Sylake-X, Cascadelake
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=skylake-avx512 -maes -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx512
|
|
||||||
|
|
||||||
# AVX2 SHA VAES: generic, zen3, alderlake...arrowlake
|
|
||||||
make clean || echo done
|
|
||||||
rm -f config.status
|
|
||||||
# vaes doesn't include aes
|
|
||||||
CFLAGS="-O3 -maes -mavx2 -msha -mvaes -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx2-sha-vaes
|
|
||||||
|
|
||||||
# AVX2 SHA AES: AMD Zen1
|
|
||||||
make clean || echo done
|
|
||||||
rm -f config.status
|
|
||||||
#CFLAGS="-O3 -march=znver1 -maes -Wall" ./configure --with-curl
|
|
||||||
CFLAGS="-O3 -maes -mavx2 -msha -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx2-sha
|
|
||||||
|
|
||||||
# AVX2 AES: Intel Haswell..Cometlake
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
# GCC 9 doesn't include AES with core-avx2
|
|
||||||
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx2
|
|
||||||
|
|
||||||
# AVX AES: Intel Sandybridge, Ivybridge
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx
|
|
||||||
|
|
||||||
# SSE4.2 AES: Intel Westmere, most Pentium & Celeron
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=westmere -maes -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-aes-sse42
|
|
||||||
|
|
||||||
# SSE4.2: Intel Nehalem
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-sse42
|
|
||||||
|
|
||||||
# SSSE3: Intel Core2
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-ssse3
|
|
||||||
|
|
||||||
# SSE2
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -msse2 -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-sse2
|
|
||||||
|
|
||||||
# X86_64
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=x86-64 -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-x64
|
|
||||||
|
|
||||||
# Native to host CPU
|
|
||||||
make clean || echo done
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
strip -s cpuminer
|
|
||||||
|
|
@@ -1,15 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Linux build
|
|
||||||
|
|
||||||
make distclean || echo clean
|
|
||||||
|
|
||||||
rm -f config.status
|
|
||||||
./autogen.sh || echo done
|
|
||||||
|
|
||||||
CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure --with-curl --host=aarch64-cortexa76-elf --build=x86_64-pc-linux-gnu --target=aarch64-cortexa76-elf
|
|
||||||
#CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure --with-curl
|
|
||||||
|
|
||||||
make -j $(nproc)
|
|
||||||
|
|
||||||
strip -s cpuminer
|
|
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
# Linux build
|
|
||||||
|
|
||||||
make distclean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
./autogen.sh || echo done
|
|
||||||
CFLAGS="-O3 -march=haswell -maes -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
make distclean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
./autogen.sh || echo done
|
|
||||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
|
||||||
make -j $(nproc)
|
|
||||||
|
|
||||||
#strip -s cpuminer
|
|
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# make clean and rm all the targetted executables.
|
|
||||||
|
|
||||||
rm cpuminer-avx10* cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-alderlake cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 cpuminer-armv9 cpuminer-armv9-crypto cpuminer-armv9-crypto-sha3 cpuminer-armv8.4-crypto-sha3 cpuminer-armv8.5-crypto-sha3-sve2 cpuminer-armv8-crypto cpuminer-armv8 > /dev/null
|
|
||||||
|
|
||||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe cpuminer-zen3.exe cpuminer-zen4.exe cpuminer-x64.exe > /dev/null
|
|
||||||
|
|
||||||
make distclean > /dev/null
|
|
@@ -1,96 +0,0 @@
|
|||||||
#ifndef __COMPAT_H__
|
|
||||||
#define __COMPAT_H__
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
|
|
||||||
#if _WIN32_WINNT>=0x0601 // Windows 7
|
|
||||||
#define WINDOWS_CPU_GROUPS_ENABLED 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <windows.h>
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
#ifndef localtime_r
|
|
||||||
#define localtime_r(src, dst) localtime_s(dst, src)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define sleep(secs) Sleep((secs) * 1000)
|
|
||||||
|
|
||||||
enum {
|
|
||||||
PRIO_PROCESS = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
extern int opt_priority;
|
|
||||||
static __inline int setpriority(int which, int who, int prio)
|
|
||||||
{
|
|
||||||
switch (opt_priority) {
|
|
||||||
case 5:
|
|
||||||
prio = THREAD_PRIORITY_TIME_CRITICAL;
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
prio = THREAD_PRIORITY_HIGHEST;
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
prio = THREAD_PRIORITY_ABOVE_NORMAL;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
prio = THREAD_PRIORITY_NORMAL;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
prio = THREAD_PRIORITY_BELOW_NORMAL;
|
|
||||||
break;
|
|
||||||
case 0:
|
|
||||||
default:
|
|
||||||
prio = THREAD_PRIORITY_IDLE;
|
|
||||||
}
|
|
||||||
return -!SetThreadPriority(GetCurrentThread(), prio);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#define snprintf(...) _snprintf(__VA_ARGS__)
|
|
||||||
#define strdup(...) _strdup(__VA_ARGS__)
|
|
||||||
#define strncasecmp(x,y,z) _strnicmp(x,y,z)
|
|
||||||
#define strcasecmp(x,y) _stricmp(x,y)
|
|
||||||
#define __func__ __FUNCTION__
|
|
||||||
#define __thread __declspec(thread)
|
|
||||||
#define _ALIGN(x) __declspec(align(x))
|
|
||||||
typedef int ssize_t;
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
// This static var is made to be compatible with linux/mingw (no free on string result)
|
|
||||||
// This is not thread safe but we only use that once on process start
|
|
||||||
static char dirname_buffer[_MAX_PATH] = { 0 };
|
|
||||||
static __inline char * dirname(char *file) {
|
|
||||||
char drive[_MAX_DRIVE] = { 0 };
|
|
||||||
char dir[_MAX_DIR] = { 0 };
|
|
||||||
char fname[_MAX_FNAME], ext[_MAX_EXT];
|
|
||||||
_splitpath_s(file, drive, _MAX_DRIVE, dir, _MAX_DIR, fname, _MAX_FNAME, ext, _MAX_EXT);
|
|
||||||
if (dir && strlen(dir) && dir[strlen(dir)-1] == '\\') {
|
|
||||||
dir[strlen(dir) - 1] = '\0';
|
|
||||||
}
|
|
||||||
sprintf(dirname_buffer, "%s%s", drive, dir);
|
|
||||||
return &dirname_buffer[0];
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* WIN32 */
|
|
||||||
|
|
||||||
#ifndef _MSC_VER
|
|
||||||
#define _ALIGN(x) __attribute__ ((aligned(x)))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef unlikely
|
|
||||||
#undef likely
|
|
||||||
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
|
|
||||||
#define unlikely(expr) (__builtin_expect(!!(expr), 0))
|
|
||||||
#define likely(expr) (__builtin_expect(!!(expr), 1))
|
|
||||||
#else
|
|
||||||
#define unlikely(expr) (expr)
|
|
||||||
#define likely(expr) (expr)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef WIN32
|
|
||||||
#define MAX_PATH PATH_MAX
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __COMPAT_H__ */
|
|
@@ -1,22 +0,0 @@
|
|||||||
{
|
|
||||||
"_comment" : "Any long-format command line argument ",
|
|
||||||
"_comment" : "may be used in this JSON configuration file",
|
|
||||||
"_comment" : "Additional arguments may be added to the command line.",
|
|
||||||
"_comment" : "Usage: cpuminer -c myconfig.json [additional arguments]",
|
|
||||||
|
|
||||||
"_comment" : "Required arguments, replace dummy values",
|
|
||||||
|
|
||||||
"url" : "stratum+tcp://example.com:3333",
|
|
||||||
"user" : "read.pool.instructions",
|
|
||||||
"pass" : "x.often.works",
|
|
||||||
"algo" : "algo",
|
|
||||||
|
|
||||||
"_comment" : "Often used optional arguments with default values selected.",
|
|
||||||
"_comment" : "Change values, add or delete arguments as desired.",
|
|
||||||
|
|
||||||
"threads" : 0,
|
|
||||||
"cpu-affinity" : -1,
|
|
||||||
"api-bind" : "127.0.0.1:4048",
|
|
||||||
"benchmark" : false,
|
|
||||||
"quiet" : false
|
|
||||||
}
|
|
8398
rin/miner/cpuminer/configure
vendored
8398
rin/miner/cpuminer/configure
vendored
File diff suppressed because it is too large
Load Diff
@@ -1,139 +0,0 @@
|
|||||||
AC_INIT([cpuminer-opt], [25.3])
|
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
|
||||||
AC_CANONICAL_SYSTEM
|
|
||||||
AC_CONFIG_SRCDIR([cpu-miner.c])
|
|
||||||
AC_CONFIG_HEADERS([cpuminer-config.h])
|
|
||||||
AC_CONFIG_MACRO_DIR([m4])
|
|
||||||
|
|
||||||
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
|
||||||
|
|
||||||
dnl Make sure anyone changing configure.ac/Makefile.am has a clue
|
|
||||||
AM_MAINTAINER_MODE
|
|
||||||
|
|
||||||
dnl Checks for programs
|
|
||||||
AC_PROG_CC_C99
|
|
||||||
AC_PROG_GCC_TRADITIONAL
|
|
||||||
AM_PROG_CC_C_O
|
|
||||||
AM_PROG_AS
|
|
||||||
AC_PROG_RANLIB
|
|
||||||
AC_PROG_CXX
|
|
||||||
|
|
||||||
dnl Checks for header files
|
|
||||||
AC_HEADER_STDC
|
|
||||||
AC_CHECK_HEADERS([sys/endian.h sys/param.h syslog.h])
|
|
||||||
# sys/sysctl.h requires sys/types.h on FreeBSD
|
|
||||||
# sys/sysctl.h requires sys/param.h on OpenBSD
|
|
||||||
AC_CHECK_HEADERS([sys/sysctl.h], [], [],
|
|
||||||
[#include <sys/types.h>
|
|
||||||
#ifdef HAVE_SYS_PARAM_H
|
|
||||||
#include <sys/param.h>
|
|
||||||
#endif
|
|
||||||
])
|
|
||||||
|
|
||||||
AC_CHECK_DECLS([be32dec, le32dec, be32enc, le32enc, le16dec, le16enc], [], [],
|
|
||||||
[AC_INCLUDES_DEFAULT
|
|
||||||
#ifdef HAVE_SYS_ENDIAN_H
|
|
||||||
#include <sys/endian.h>
|
|
||||||
#endif
|
|
||||||
])
|
|
||||||
|
|
||||||
AC_FUNC_ALLOCA
|
|
||||||
AC_CHECK_FUNCS([getopt_long])
|
|
||||||
|
|
||||||
case $target in
|
|
||||||
x86_64-*-*|amd64-*-*)
|
|
||||||
have_x86_64=true
|
|
||||||
;;
|
|
||||||
aarch64*-*-*|arm64*-*-*)
|
|
||||||
have_arm64=true
|
|
||||||
;;
|
|
||||||
powerpc*-*-*)
|
|
||||||
have_ppc=true
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
PTHREAD_FLAGS="-pthread"
|
|
||||||
WS2_LIBS=""
|
|
||||||
|
|
||||||
case $target in
|
|
||||||
*-*-mingw*)
|
|
||||||
have_win32=true
|
|
||||||
PTHREAD_FLAGS=""
|
|
||||||
WS2_LIBS="-lws2_32"
|
|
||||||
;;
|
|
||||||
*-apple-*)
|
|
||||||
have_apple=true
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AC_ARG_ENABLE([assembly],
|
|
||||||
AS_HELP_STRING([--disable-assembly], [disable assembly-language routines]))
|
|
||||||
if test x$enable_assembly != xno; then
|
|
||||||
AC_DEFINE([USE_ASM], [1], [Define to 1 if assembly routines are wanted.])
|
|
||||||
fi
|
|
||||||
|
|
||||||
# jansson test fails on Linux/Mingw, handled in Makefile.am.
|
|
||||||
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
|
|
||||||
|
|
||||||
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthread",
|
|
||||||
AC_CHECK_LIB([pthreadGC2], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",
|
|
||||||
AC_CHECK_LIB([pthreadGC1], [pthread_create], PTHREAD_LIBS="-lpthreadGC1",
|
|
||||||
AC_CHECK_LIB([pthreadGC], [pthread_create], PTHREAD_LIBS="-lpthreadGC"
|
|
||||||
))))
|
|
||||||
|
|
||||||
AC_MSG_CHECKING(whether __uint128_t is supported)
|
|
||||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([static __uint128_t i = 100;])],
|
|
||||||
AC_DEFINE(USE_INT128, 1, [Define if __uint128_t is available])
|
|
||||||
AC_MSG_RESULT(yes)
|
|
||||||
,
|
|
||||||
AC_MSG_RESULT(no)
|
|
||||||
)
|
|
||||||
|
|
||||||
# allow if <var> in Makefile.am
|
|
||||||
AM_CONDITIONAL([WANT_JANSSON], [test x$request_jansson = xtrue])
|
|
||||||
AM_CONDITIONAL([HAVE_WINDOWS], [test x$have_win32 = xtrue])
|
|
||||||
AM_CONDITIONAL([USE_ASM], [test x$enable_assembly != xno])
|
|
||||||
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
|
|
||||||
AM_CONDITIONAL([ARCH_ARM64], [test x$have_arm64 = xtrue])
|
|
||||||
AM_CONDITIONAL([MINGW], [test "x$OS" = "xWindows_NT"])
|
|
||||||
AM_CONDITIONAL([HAVE_APPLE], [test x$have_apple = xtrue])
|
|
||||||
|
|
||||||
# libcurl install path (for mingw : --with-curl=/usr/local)
|
|
||||||
AC_ARG_WITH([curl],
|
|
||||||
[ --with-curl=PATH prefix where curl is installed [default=/usr]])
|
|
||||||
|
|
||||||
if test -n "$with_curl" ; then
|
|
||||||
LIBCURL_CFLAGS="$LIBCURL_CFLAGS -I$with_curl/include"
|
|
||||||
LIBCURL_CPPFLAGS="$LIBCURL_CPPFLAGS -I$with_curl/include"
|
|
||||||
LIBCURL_LDFLAGS="-L$with_curl/lib $LIBCURL_LDFLAGS"
|
|
||||||
LIBCURL="-lcurl -lz"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CFLAGS="$CFLAGS $LIBCURL_CFLAGS"
|
|
||||||
CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS"
|
|
||||||
LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS"
|
|
||||||
|
|
||||||
# AC_CHECK_LIB([curl], [curl_multi_timeout],
|
|
||||||
# have_libcurl=yes,
|
|
||||||
# have_libcurl=no AC_MSG_ERROR([curl library required])
|
|
||||||
# )
|
|
||||||
|
|
||||||
# LIBCURL_CHECK_CONFIG([yes], 7.15, curlconfig=yes, curlconfig=no)
|
|
||||||
|
|
||||||
AC_SUBST(LIBCURL)
|
|
||||||
AC_SUBST(LIBCURL_CFLAGS)
|
|
||||||
AC_SUBST(LIBCURL_CPPFLAGS)
|
|
||||||
# AC_SUBST(LIBCURL_LDFLAGS)
|
|
||||||
|
|
||||||
AC_SUBST(JANSSON_LIBS)
|
|
||||||
AC_SUBST(PTHREAD_FLAGS)
|
|
||||||
AC_SUBST(PTHREAD_LIBS)
|
|
||||||
AC_SUBST(WS2_LIBS)
|
|
||||||
|
|
||||||
AC_CONFIG_FILES([
|
|
||||||
Makefile
|
|
||||||
compat/Makefile
|
|
||||||
compat/jansson/Makefile
|
|
||||||
])
|
|
||||||
AC_OUTPUT
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1
rin/miner/cpuminer/cpuminer-opt-rin
Submodule
1
rin/miner/cpuminer/cpuminer-opt-rin
Submodule
Submodule rin/miner/cpuminer/cpuminer-opt-rin added at dfbd6b03a6
@@ -1,225 +0,0 @@
|
|||||||
.TH CPUMINER 1 "May 2014" "cpuminer 2.4"
|
|
||||||
.SH NAME
|
|
||||||
cpuminer \- CPU miner for Bitcoin and Litecoin
|
|
||||||
.SH SYNOPSIS
|
|
||||||
.B cpuminer
|
|
||||||
[\fIOPTION\fR]...
|
|
||||||
.SH DESCRIPTION
|
|
||||||
.B cpuminer
|
|
||||||
is a multi-threaded CPU miner for Bitcoin, Litecoin and other cryptocurrencies.
|
|
||||||
It supports the getwork and getblocktemplate (BIP 22) methods,
|
|
||||||
as well as the Stratum mining protocol.
|
|
||||||
.PP
|
|
||||||
In its normal mode of operation, \fBcpuminer\fR connects to a mining server
|
|
||||||
(specified with the \fB\-o\fR option), receives work from it and starts hashing.
|
|
||||||
As soon as a solution is found, it is submitted to the same mining server,
|
|
||||||
which can accept or reject it.
|
|
||||||
When using getwork or getblocktemplate,
|
|
||||||
\fBcpuminer\fR can take advantage of long polling, if the server supports it;
|
|
||||||
in any case, fresh work is fetched as needed.
|
|
||||||
When using the Stratum protocol this is not possible,
|
|
||||||
and the server is responsible for sending fresh work at least every minute;
|
|
||||||
if it fails to do so,
|
|
||||||
\fBcpuminer\fR may drop the connection and try reconnecting again.
|
|
||||||
.PP
|
|
||||||
By default, \fBcpuminer\fR writes all its messages to standard error.
|
|
||||||
On systems that have a syslog, the \fB\-\-syslog\fR option can be used
|
|
||||||
to write to it instead.
|
|
||||||
.PP
|
|
||||||
On start, the nice value of all miner threads is set to 19.
|
|
||||||
On Linux, the scheduling policy is also changed to SCHED_IDLE,
|
|
||||||
or to SCHED_BATCH if that fails.
|
|
||||||
On multiprocessor systems, \fBcpuminer\fR
|
|
||||||
automatically sets the CPU affinity of miner threads
|
|
||||||
if the number of threads is a multiple of the number of processors.
|
|
||||||
.SH EXAMPLES
|
|
||||||
To connect to a Litecoin mining pool that provides a Stratum server
|
|
||||||
at example.com on port 3333, authenticating as worker "foo" with password "bar":
|
|
||||||
.PP
|
|
||||||
.nf
|
|
||||||
.RS
|
|
||||||
cpuminer \-o stratum+tcp://example.com:3333 \-O foo:bar
|
|
||||||
.RE
|
|
||||||
.fi
|
|
||||||
.PP
|
|
||||||
To mine to a local Bitcoin testnet instance running on port 18332,
|
|
||||||
authenticating with username "rpcuser" and password "rpcpass":
|
|
||||||
.PP
|
|
||||||
.nf
|
|
||||||
.RS
|
|
||||||
cpuminer \-a sha256d \-o http://localhost:18332 \-O rpcuser:rpcpass \\
|
|
||||||
\-\-coinbase\-addr=mpXwg4jMtRhuSpVq4xS3HFHmCmWp9NyGKt
|
|
||||||
.RE
|
|
||||||
.fi
|
|
||||||
.PP
|
|
||||||
To connect to a Litecoin P2Pool node running on my.server on port 9327,
|
|
||||||
mining in the background and having output sent to the syslog facility,
|
|
||||||
omitting the per-thread hashmeter output:
|
|
||||||
.PP
|
|
||||||
.nf
|
|
||||||
.RS
|
|
||||||
cpuminer \-BSq \-o http://my.server:9327
|
|
||||||
.RE
|
|
||||||
.fi
|
|
||||||
.SH OPTIONS
|
|
||||||
.TP
|
|
||||||
\fB\-a\fR, \fB\-\-algo\fR=\fIALGORITHM\fR
|
|
||||||
Set the hashing algorithm to use.
|
|
||||||
Default is scrypt.
|
|
||||||
Possible values are:
|
|
||||||
.RS 11
|
|
||||||
.TP 10
|
|
||||||
.B scrypt
|
|
||||||
scrypt(1024, 1, 1) (used by Litecoin)
|
|
||||||
.TP
|
|
||||||
.B scrypt:\fIN\fR
|
|
||||||
scrypt(\fIN\fR, 1, 1) (\fIN\fR must be a power of 2 greater than 1)
|
|
||||||
.TP
|
|
||||||
.B sha256d
|
|
||||||
SHA-256d (used by Bitcoin)
|
|
||||||
.RE
|
|
||||||
.TP
|
|
||||||
\fB\-\-benchmark\fR
|
|
||||||
Run in offline benchmark mode.
|
|
||||||
.TP
|
|
||||||
\fB\-B\fR, \fB\-\-background\fR
|
|
||||||
Run in the background as a daemon.
|
|
||||||
.TP
|
|
||||||
\fB\-\-cert\fR=\fIFILE\fR
|
|
||||||
Set an SSL certificate to use with the mining server.
|
|
||||||
Only supported when using the HTTPS protocol.
|
|
||||||
.TP
|
|
||||||
\fB\-\-coinbase\-addr\fR=\fIADDRESS\fR
|
|
||||||
Set a payout address for solo mining.
|
|
||||||
This is only used in getblocktemplate mode,
|
|
||||||
and only if the server does not provide a coinbase transaction.
|
|
||||||
.TP
|
|
||||||
\fB\-\-coinbase\-sig\fR=\fITEXT\fR
|
|
||||||
Set a string to be included in the coinbase (if allowed by the server).
|
|
||||||
This is only used in getblocktemplate mode.
|
|
||||||
.TP
|
|
||||||
\fB\-c\fR, \fB\-\-config\fR=\fIFILE\fR
|
|
||||||
Load options from a configuration file.
|
|
||||||
\fIFILE\fR must contain a JSON object
|
|
||||||
mapping long options to their arguments (as strings),
|
|
||||||
or to \fBtrue\fR if no argument is required.
|
|
||||||
Sample configuration file:
|
|
||||||
|
|
||||||
.nf
|
|
||||||
{
|
|
||||||
"url": "stratum+tcp://example.com:3333",
|
|
||||||
"userpass": "foo:bar",
|
|
||||||
"retry-pause": "10",
|
|
||||||
"quiet": true
|
|
||||||
}
|
|
||||||
.fi
|
|
||||||
.TP
|
|
||||||
\fB\-D\fR, \fB\-\-debug\fR
|
|
||||||
Enable debug output.
|
|
||||||
.TP
|
|
||||||
\fB\-h\fR, \fB\-\-help\fR
|
|
||||||
Print a help message and exit.
|
|
||||||
.TP
|
|
||||||
\fB\-\-no\-gbt\fR
|
|
||||||
Do not use the getblocktemplate RPC method.
|
|
||||||
.TP
|
|
||||||
\fB\-\-no\-getwork\fR
|
|
||||||
Do not use the getwork RPC method.
|
|
||||||
.TP
|
|
||||||
\fB\-\-no\-longpoll\fR
|
|
||||||
Do not use long polling.
|
|
||||||
.TP
|
|
||||||
\fB\-\-no\-redirect\fR
|
|
||||||
Ignore requests from the server to switch to a different URL.
|
|
||||||
.TP
|
|
||||||
\fB\-\-no\-stratum\fR
|
|
||||||
Do not switch to Stratum, even if the server advertises support for it.
|
|
||||||
.TP
|
|
||||||
\fB\-o\fR, \fB\-\-url\fR=[\fISCHEME\fR://][\fIUSERNAME\fR[:\fIPASSWORD\fR]@]\fIHOST\fR:\fIPORT\fR[/\fIPATH\fR]
|
|
||||||
Set the URL of the mining server to connect to.
|
|
||||||
Supported schemes are \fBhttp\fR, \fBhttps\fR and \fBstratum+tcp\fR.
|
|
||||||
If no scheme is specified, http is assumed.
|
|
||||||
Specifying a \fIPATH\fR is only supported for HTTP and HTTPS.
|
|
||||||
Specifying credentials has the same effect as using the \fB\-O\fR option.
|
|
||||||
|
|
||||||
By default, on HTTP and HTTPS,
|
|
||||||
the miner tries to use the getblocktemplate RPC method,
|
|
||||||
and falls back to using getwork if getblocktemplate is unavailable.
|
|
||||||
This behavior can be modified by using the \fB\-\-no\-gbt\fR
|
|
||||||
and \fB\-\-no\-getwork\fR options.
|
|
||||||
.TP
|
|
||||||
\fB\-O\fR, \fB\-\-userpass\fR=\fIUSERNAME\fR:\fIPASSWORD\fR
|
|
||||||
Set the credentials to use for connecting to the mining server.
|
|
||||||
Any value previously set with \fB\-u\fR or \fB\-p\fR is discarded.
|
|
||||||
.TP
|
|
||||||
\fB\-p\fR, \fB\-\-pass\fR=\fIPASSWORD\fR
|
|
||||||
Set the password to use for connecting to the mining server.
|
|
||||||
Any password previously set with \fB\-O\fR is discarded.
|
|
||||||
.TP
|
|
||||||
\fB\-P\fR, \fB\-\-protocol\-dump\fR
|
|
||||||
Enable output of all protocol-level activities.
|
|
||||||
.TP
|
|
||||||
\fB\-q\fR, \fB\-\-quiet\fR
|
|
||||||
Disable per-thread hashmeter output.
|
|
||||||
.TP
|
|
||||||
\fB\-r\fR, \fB\-\-retries\fR=\fIN\fR
|
|
||||||
Set the maximum number of times to retry if a network call fails.
|
|
||||||
If not specified, the miner will retry indefinitely.
|
|
||||||
.TP
|
|
||||||
\fB\-R\fR, \fB\-\-retry\-pause\fR=\fISECONDS\fR
|
|
||||||
Set how long to wait between retries. Default is 30 seconds.
|
|
||||||
.TP
|
|
||||||
\fB\-s\fR, \fB\-\-scantime\fR=\fISECONDS\fR
|
|
||||||
Set an upper bound on the time the miner can go without fetching fresh work.
|
|
||||||
This setting has no effect in Stratum mode or when long polling is activated.
|
|
||||||
Default is 5 seconds.
|
|
||||||
.TP
|
|
||||||
\fB\-S\fR, \fB\-\-syslog\fR
|
|
||||||
Log to the syslog facility instead of standard error.
|
|
||||||
.TP
|
|
||||||
\fB\-t\fR, \fB\-\-threads\fR=\fIN\fR
|
|
||||||
Set the number of miner threads.
|
|
||||||
If not specified, the miner will try to detect the number of available processors
|
|
||||||
and use that.
|
|
||||||
.TP
|
|
||||||
\fB\-T\fR, \fB\-\-timeout\fR=\fISECONDS\fR
|
|
||||||
Set a timeout for long polling.
|
|
||||||
.TP
|
|
||||||
\fB\-u\fR, \fB\-\-user\fR=\fIUSERNAME\fR
|
|
||||||
Set the username to use for connecting to the mining server.
|
|
||||||
Any username previously set with \fB\-O\fR is discarded.
|
|
||||||
.TP
|
|
||||||
\fB\-V\fR, \fB\-\-version\fR
|
|
||||||
Display version information and quit.
|
|
||||||
.TP
|
|
||||||
\fB\-x\fR, \fB\-\-proxy\fR=[\fISCHEME\fR://][\fIUSERNAME\fR:\fIPASSWORD\fR@]\fIHOST\fR:\fIPORT\fR
|
|
||||||
Connect to the mining server through a proxy.
|
|
||||||
Supported schemes are: \fBhttp\fR, \fBsocks4\fR, \fBsocks5\fR.
|
|
||||||
Since libcurl 7.18.0, the following are also supported:
|
|
||||||
\fBsocks4a\fR, \fBsocks5h\fR (SOCKS5 with remote name resolving).
|
|
||||||
If no scheme is specified, the proxy is treated as an HTTP proxy.
|
|
||||||
.SH ENVIRONMENT
|
|
||||||
The following environment variables can be specified in lower case or upper case;
|
|
||||||
the lower-case version has precedence. \fBhttp_proxy\fR is an exception
|
|
||||||
as it is only available in lower case.
|
|
||||||
.PP
|
|
||||||
.RS
|
|
||||||
.TP
|
|
||||||
\fBhttp_proxy\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
|
|
||||||
Sets the proxy server to use for HTTP.
|
|
||||||
.TP
|
|
||||||
\fBHTTPS_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
|
|
||||||
Sets the proxy server to use for HTTPS.
|
|
||||||
.TP
|
|
||||||
\fBALL_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
|
|
||||||
Sets the proxy server to use if no protocol-specific proxy is set.
|
|
||||||
.RE
|
|
||||||
.PP
|
|
||||||
Using an environment variable to set the proxy has the same effect as
|
|
||||||
using the \fB\-x\fR option.
|
|
||||||
.SH AUTHOR
|
|
||||||
This variant is maintained by tpruvot@github.
|
|
||||||
|
|
||||||
Most of the code in the current version of cpuminer was written by
|
|
||||||
Pooler <pooler@litecoinpool.org> with contributions from others.
|
|
||||||
The original minerd was written by Jeff Garzik <jeff@garzik.org>.
|
|
@@ -1,6 +0,0 @@
|
|||||||
// This file exists to force the use of g++ as the linker which in turn
|
|
||||||
// links the math library with the inclusion of math.h. gcc will not
|
|
||||||
// automatically link math. Without this file linking will fail for m7m.c.
|
|
||||||
// Linking math manually, allowing gcc to do the linking work on Linux
|
|
||||||
// but on Windows it segfaults. Until that is solved this file must continue
|
|
||||||
// to exist.
|
|
@@ -1,254 +0,0 @@
|
|||||||
#ifndef _LINUX_LIST_H
|
|
||||||
#define _LINUX_LIST_H
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Simple doubly linked list implementation.
|
|
||||||
*
|
|
||||||
* Some of the internal functions ("__xxx") are useful when
|
|
||||||
* manipulating whole lists rather than single entries, as
|
|
||||||
* sometimes we already know the next/prev entries and we can
|
|
||||||
* generate better code by using them directly rather than
|
|
||||||
* using the generic single-entry routines.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct list_head {
|
|
||||||
struct list_head *next, *prev;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define LIST_HEAD_INIT(name) { &(name), &(name) }
|
|
||||||
|
|
||||||
#define LIST_HEAD(name) \
|
|
||||||
struct list_head name = LIST_HEAD_INIT(name)
|
|
||||||
|
|
||||||
#define INIT_LIST_HEAD(ptr) do { \
|
|
||||||
(ptr)->next = (ptr); (ptr)->prev = (ptr); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Insert a new entry between two known consecutive entries.
|
|
||||||
*
|
|
||||||
* This is only for internal list manipulation where we know
|
|
||||||
* the prev/next entries already!
|
|
||||||
*/
|
|
||||||
static inline void __list_add(struct list_head *nlh,
|
|
||||||
struct list_head *prev,
|
|
||||||
struct list_head *next)
|
|
||||||
{
|
|
||||||
next->prev = nlh;
|
|
||||||
nlh->next = next;
|
|
||||||
nlh->prev = prev;
|
|
||||||
prev->next = nlh;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_add - add a new entry
|
|
||||||
* @new: new entry to be added
|
|
||||||
* @head: list head to add it after
|
|
||||||
*
|
|
||||||
* Insert a new entry after the specified head.
|
|
||||||
* This is good for implementing stacks.
|
|
||||||
*/
|
|
||||||
static inline void list_add(struct list_head *nlh, struct list_head *head)
|
|
||||||
{
|
|
||||||
__list_add(nlh, head, head->next);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_add_tail - add a new entry
|
|
||||||
* @new: new entry to be added
|
|
||||||
* @head: list head to add it before
|
|
||||||
*
|
|
||||||
* Insert a new entry before the specified head.
|
|
||||||
* This is useful for implementing queues.
|
|
||||||
*/
|
|
||||||
static inline void list_add_tail(struct list_head *nlh, struct list_head *head)
|
|
||||||
{
|
|
||||||
__list_add(nlh, head->prev, head);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Delete a list entry by making the prev/next entries
|
|
||||||
* point to each other.
|
|
||||||
*
|
|
||||||
* This is only for internal list manipulation where we know
|
|
||||||
* the prev/next entries already!
|
|
||||||
*/
|
|
||||||
static inline void __list_del(struct list_head *prev, struct list_head *next)
|
|
||||||
{
|
|
||||||
next->prev = prev;
|
|
||||||
prev->next = next;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_del - deletes entry from list.
|
|
||||||
* @entry: the element to delete from the list.
|
|
||||||
* Note: list_empty on entry does not return true after this, the entry is in an undefined state.
|
|
||||||
*/
|
|
||||||
static inline void list_del(struct list_head *entry)
|
|
||||||
{
|
|
||||||
__list_del(entry->prev, entry->next);
|
|
||||||
entry->next = NULL;
|
|
||||||
entry->prev = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_del_init - deletes entry from list and reinitialize it.
|
|
||||||
* @entry: the element to delete from the list.
|
|
||||||
*/
|
|
||||||
static inline void list_del_init(struct list_head *entry)
|
|
||||||
{
|
|
||||||
__list_del(entry->prev, entry->next);
|
|
||||||
INIT_LIST_HEAD(entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_move - delete from one list and add as another's head
|
|
||||||
* @list: the entry to move
|
|
||||||
* @head: the head that will precede our entry
|
|
||||||
*/
|
|
||||||
static inline void list_move(struct list_head *list, struct list_head *head)
|
|
||||||
{
|
|
||||||
__list_del(list->prev, list->next);
|
|
||||||
list_add(list, head);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_move_tail - delete from one list and add as another's tail
|
|
||||||
* @list: the entry to move
|
|
||||||
* @head: the head that will follow our entry
|
|
||||||
*/
|
|
||||||
static inline void list_move_tail(struct list_head *list,
|
|
||||||
struct list_head *head)
|
|
||||||
{
|
|
||||||
__list_del(list->prev, list->next);
|
|
||||||
list_add_tail(list, head);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_empty - tests whether a list is empty
|
|
||||||
* @head: the list to test.
|
|
||||||
*/
|
|
||||||
static inline int list_empty(struct list_head *head)
|
|
||||||
{
|
|
||||||
return head->next == head;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __list_splice(struct list_head *list,
|
|
||||||
struct list_head *head)
|
|
||||||
{
|
|
||||||
struct list_head *first = list->next;
|
|
||||||
struct list_head *last = list->prev;
|
|
||||||
struct list_head *at = head->next;
|
|
||||||
|
|
||||||
first->prev = head;
|
|
||||||
head->next = first;
|
|
||||||
|
|
||||||
last->next = at;
|
|
||||||
at->prev = last;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_splice - join two lists
|
|
||||||
* @list: the new list to add.
|
|
||||||
* @head: the place to add it in the first list.
|
|
||||||
*/
|
|
||||||
static inline void list_splice(struct list_head *list, struct list_head *head)
|
|
||||||
{
|
|
||||||
if (!list_empty(list))
|
|
||||||
__list_splice(list, head);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_splice_init - join two lists and reinitialise the emptied list.
|
|
||||||
* @list: the new list to add.
|
|
||||||
* @head: the place to add it in the first list.
|
|
||||||
*
|
|
||||||
* The list at @list is reinitialised
|
|
||||||
*/
|
|
||||||
static inline void list_splice_init(struct list_head *list,
|
|
||||||
struct list_head *head)
|
|
||||||
{
|
|
||||||
if (!list_empty(list)) {
|
|
||||||
__list_splice(list, head);
|
|
||||||
INIT_LIST_HEAD(list);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_entry - get the struct for this entry
|
|
||||||
* @ptr: the &struct list_head pointer.
|
|
||||||
* @type: the type of the struct this is embedded in.
|
|
||||||
* @member: the name of the list_struct within the struct.
|
|
||||||
*/
|
|
||||||
#define list_entry(ptr, type, member) \
|
|
||||||
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_for_each - iterate over a list
|
|
||||||
* @pos: the &struct list_head to use as a loop counter.
|
|
||||||
* @head: the head for your list.
|
|
||||||
*/
|
|
||||||
#define list_for_each(pos, head) \
|
|
||||||
for (pos = (head)->next; pos != (head); \
|
|
||||||
pos = pos->next)
|
|
||||||
/**
|
|
||||||
* list_for_each_prev - iterate over a list backwards
|
|
||||||
* @pos: the &struct list_head to use as a loop counter.
|
|
||||||
* @head: the head for your list.
|
|
||||||
*/
|
|
||||||
#define list_for_each_prev(pos, head) \
|
|
||||||
for (pos = (head)->prev; pos != (head); \
|
|
||||||
pos = pos->prev)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_for_each_safe - iterate over a list safe against removal of list entry
|
|
||||||
* @pos: the &struct list_head to use as a loop counter.
|
|
||||||
* @n: another &struct list_head to use as temporary storage
|
|
||||||
* @head: the head for your list.
|
|
||||||
*/
|
|
||||||
#define list_for_each_safe(pos, n, head) \
|
|
||||||
for (pos = (head)->next, n = pos->next; pos != (head); \
|
|
||||||
pos = n, n = pos->next)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_for_each_entry - iterate over list of given type
|
|
||||||
* @pos: the type * to use as a loop counter.
|
|
||||||
* @head: the head for your list.
|
|
||||||
* @member: the name of the list_struct within the struct.
|
|
||||||
* @type: the type of the struct.
|
|
||||||
*/
|
|
||||||
#define list_for_each_entry(pos, head, member, type) \
|
|
||||||
for (pos = list_entry((head)->next, type, member); \
|
|
||||||
&pos->member != (head); \
|
|
||||||
pos = list_entry(pos->member.next, type, member))
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
|
|
||||||
* @pos: the type * to use as a loop counter.
|
|
||||||
* @n: another type * to use as temporary storage
|
|
||||||
* @head: the head for your list.
|
|
||||||
* @member: the name of the list_struct within the struct.
|
|
||||||
* @type: the type of the struct.
|
|
||||||
*/
|
|
||||||
#define list_for_each_entry_safe(pos, n, head, member, type) \
|
|
||||||
for (pos = list_entry((head)->next, type, member), \
|
|
||||||
n = list_entry(pos->member.next, type, member); \
|
|
||||||
&pos->member != (head); \
|
|
||||||
pos = n, n = list_entry(n->member.next, type, member))
|
|
||||||
|
|
||||||
/**
|
|
||||||
* list_for_each_entry_continue - iterate over list of given type
|
|
||||||
* continuing after existing point
|
|
||||||
* @pos: the type * to use as a loop counter.
|
|
||||||
* @head: the head for your list.
|
|
||||||
* @member: the name of the list_struct within the struct.
|
|
||||||
* @type: the type of the struct.
|
|
||||||
*/
|
|
||||||
#define list_for_each_entry_continue(pos, head, member, type) \
|
|
||||||
for (pos = list_entry(pos->member.next, type, member), \
|
|
||||||
prefetch(pos->member.next); \
|
|
||||||
&pos->member != (head); \
|
|
||||||
pos = list_entry(pos->member.next, type, member), \
|
|
||||||
prefetch(pos->member.next))
|
|
||||||
|
|
||||||
#endif
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,36 +0,0 @@
|
|||||||
#include "malloc-huge.h"
|
|
||||||
#include "miner.h"
|
|
||||||
|
|
||||||
#define HUGEPAGE_SIZE_2M (2 * 1024 * 1024)
|
|
||||||
|
|
||||||
void *malloc_hugepages( size_t size )
|
|
||||||
{
|
|
||||||
#if !(defined(MAP_HUGETLB) && defined(MAP_ANON))
|
|
||||||
// applog( LOG_WARNING, "Huge pages not available",size);
|
|
||||||
return NULL;
|
|
||||||
#else
|
|
||||||
|
|
||||||
if ( size < HUGEPAGE_MIN_ALLOC )
|
|
||||||
{
|
|
||||||
// applog( LOG_WARNING, "Block too small for huge pages: %lu bytes",size);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE_2M - 1;
|
|
||||||
void *p = NULL;
|
|
||||||
int flags =
|
|
||||||
#ifdef MAP_NOCORE
|
|
||||||
MAP_NOCORE |
|
|
||||||
#endif
|
|
||||||
MAP_HUGETLB | MAP_ANON | MAP_PRIVATE;
|
|
||||||
|
|
||||||
// round size up to next page boundary
|
|
||||||
size = ( size + hugepage_mask ) & (~hugepage_mask);
|
|
||||||
|
|
||||||
p = mmap( NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0 );
|
|
||||||
if ( p == MAP_FAILED )
|
|
||||||
p = NULL;
|
|
||||||
return p;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
@@ -1,24 +0,0 @@
|
|||||||
#if !(defined(MALLOC_HUGE__))
|
|
||||||
#define MALLOC_HUGE__
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#ifdef __unix__
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(MAP_HUGETLB)
|
|
||||||
|
|
||||||
// Minimum block size 6 MiB to use huge pages
|
|
||||||
#define HUGEPAGE_MIN_ALLOC (6 * 1024 * 1024)
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Attempt to allocate memory backed by 2 MiB pages, returns NULL on failure.
|
|
||||||
void *malloc_hugepages( size_t size );
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
@@ -1,3 +0,0 @@
|
|||||||
:start
|
|
||||||
cpuminer.exe -a rinhash -o stratum+tcps://stratum-eu.rplant.xyz:17148 -u rin1qvj0yyt9phvled9kxflju3p687a4s7kareglpk5.dd
|
|
||||||
goto start
|
|
File diff suppressed because it is too large
Load Diff
@@ -1,47 +0,0 @@
|
|||||||
#!/usr/bin/perl
|
|
||||||
# Copyright 2012 pooler@litecoinpool.org
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify it
|
|
||||||
# under the terms of the GNU General Public License as published by the Free
|
|
||||||
# Software Foundation; either version 2 of the License, or (at your option)
|
|
||||||
# any later version. See COPYING for more details.
|
|
||||||
#
|
|
||||||
# nomacro.pl - convert assembler macros to C preprocessor macros.
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
|
|
||||||
foreach my $f (<*.S>) {
|
|
||||||
rename $f, "$f.orig";
|
|
||||||
open FIN, "$f.orig";
|
|
||||||
open FOUT, ">$f";
|
|
||||||
my $inmacro = 0;
|
|
||||||
my %macros = ();
|
|
||||||
while (<FIN>) {
|
|
||||||
if (m/^\.macro\s+([_0-9A-Z]+)(?:\s*)(.*)$/i) {
|
|
||||||
print FOUT "#define $1($2) \\\n";
|
|
||||||
$macros{$1} = 1;
|
|
||||||
$inmacro = 1;
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
if (m/^\.endm/) {
|
|
||||||
print FOUT "\n";
|
|
||||||
$inmacro = 0;
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
for my $m (keys %macros) {
|
|
||||||
s/^([ \t]*)($m)(?:[ \t]+([^#\n]*))?([;\n])/\1\2(\3)\4/;
|
|
||||||
}
|
|
||||||
if ($inmacro) {
|
|
||||||
if (m/^\s*#if/) {
|
|
||||||
$_ = <FIN> while (!m/^\s*#endif/);
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
next if (m/^\s*$/);
|
|
||||||
s/\\//g;
|
|
||||||
s/$/; \\/;
|
|
||||||
}
|
|
||||||
print FOUT;
|
|
||||||
}
|
|
||||||
close FOUT;
|
|
||||||
close FIN;
|
|
||||||
}
|
|
@@ -1,233 +0,0 @@
|
|||||||
#if !defined(SIMD_UTILS_H__)
|
|
||||||
#define SIMD_UTILS_H__ 1
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// SIMD utilities
|
|
||||||
//
|
|
||||||
// Not to be confused with the hashing function of the same name. This
|
|
||||||
// is about Single Instruction Multiple Data programming using CPU
|
|
||||||
// features such as SSE and AVX.
|
|
||||||
//
|
|
||||||
// This header is the entry point to a suite of macros and functions
|
|
||||||
// to perform basic operations on vectors that are useful in crypto
|
|
||||||
// mining. Some of these functions have native CPU support for scalar
|
|
||||||
// data but not for vectors. The main categories are bit rotation
|
|
||||||
// and endian byte swapping
|
|
||||||
//
|
|
||||||
// This suite supports some operations on regular 64 bit integers
|
|
||||||
// as well as 128 bit integers available on recent versions of Linux
|
|
||||||
// and GCC.
|
|
||||||
//
|
|
||||||
// It also supports various vector sizes on CPUs that meet the minimum
|
|
||||||
// requirements.
|
|
||||||
//
|
|
||||||
// The minimum for any real work is a 64 bit CPU with SSE2,
|
|
||||||
// ie an the Intel Core 2.
|
|
||||||
//
|
|
||||||
// Following are the minimum requirements for each vector size. There
|
|
||||||
// is no significant 64 bit vectorization therefore SSE2 is the practical
|
|
||||||
// minimum for using this code.
|
|
||||||
//
|
|
||||||
// SSE2: 128 bit vectors (64 bit CPUs only, such as Intel Core2.
|
|
||||||
// AVX2: 256 bit vectors (Starting with Intel Haswell and AMD Ryzen)
|
|
||||||
// AVX512: 512 bit vectors (Starting with SkylakeX)
|
|
||||||
// AVX10: when available will supersede AVX512 and will bring AVX512
|
|
||||||
// features, except 512 bit vectors, to Intel's Ecores. It needs to be
|
|
||||||
// enabled manually when the relevant GCC macros are known.
|
|
||||||
//
|
|
||||||
// Most functions are avalaible at the stated levels but in rare cases
|
|
||||||
// a higher level feature may be required with no compatible alternative.
|
|
||||||
// Some SSE2 functions have versions optimized for higher feature levels
|
|
||||||
// such as SSSE3 or SSE4.1 that will be used automatically on capable
|
|
||||||
// CPUs.
|
|
||||||
//
|
|
||||||
// Strict alignment of data is required: 16 bytes for 128 bit vectors,
|
|
||||||
// 32 bytes for 256 bit vectors and 64 bytes for 512 bit vectors. 64 byte
|
|
||||||
// alignment is recommended in all cases for best cache alignment.
|
|
||||||
//
|
|
||||||
// All functions are defined with type agnostic pointers (void*) arguments
|
|
||||||
// and are cast or aliased as the appropriate type. This adds convenience
|
|
||||||
// for the applications but also adds responsibility to ensure adequate data
|
|
||||||
// alignment.
|
|
||||||
//
|
|
||||||
// An attempt was made to make the names as similar as possible to
|
|
||||||
// Intel's intrinsic function format. Most variations are to avoid
|
|
||||||
// confusion with actual Intel intrinsics, brevity, and clarity.
|
|
||||||
//
|
|
||||||
// The main differences are:
|
|
||||||
//
|
|
||||||
// - the leading underscore "_" is dropped from the prefix of vector function
|
|
||||||
// macros.
|
|
||||||
// - "mm128" is used 128 bit prefix to be consistent with mm256 & mm512 and
|
|
||||||
// to avoid the ambiguity of "mm" which is also used for 64 bit MMX
|
|
||||||
// intrinsics.
|
|
||||||
// - the element size does not include additional type specifiers
|
|
||||||
// like "epi".
|
|
||||||
// - there is a subset of some functions for scalar data. They may have
|
|
||||||
// no prefix nor vec-size, just one size, the size of the data.
|
|
||||||
// - Some integer functions are also defined which use a similar notation.
|
|
||||||
//
|
|
||||||
// Function names follow this pattern:
|
|
||||||
//
|
|
||||||
// [prefix]_[op][vsize]_[esize]
|
|
||||||
//
|
|
||||||
// Prefix: usually the size of the returned vector.
|
|
||||||
// Following are some examples:
|
|
||||||
//
|
|
||||||
// u64: unsigned 64 bit integer function
|
|
||||||
// i128: signed 128 bit integer function (rarely used)
|
|
||||||
// m128: 128 bit vector identifier (deprecated)
|
|
||||||
// mm128: 128 bit vector function
|
|
||||||
//
|
|
||||||
// op: describes the operation of the function or names the data
|
|
||||||
// identifier.
|
|
||||||
//
|
|
||||||
// esize: optional, element size of operation
|
|
||||||
//
|
|
||||||
// vsize: optional, lane size used when a function operates on elements
|
|
||||||
// within lanes of a larger vector.
|
|
||||||
//
|
|
||||||
// Ex: mm256_shuflr128_32 rotates each 128 bit lane of a 256 bit vector
|
|
||||||
// right by 32 bits.
|
|
||||||
//
|
|
||||||
// New architecture agnostic syntax to support multiple architectures.
|
|
||||||
// currently only used for 128 bit vectors.
|
|
||||||
//
|
|
||||||
// [prefix]_[op]esize]
|
|
||||||
//
|
|
||||||
// Abbreviated when no vsize, space is removed between op & esize.
|
|
||||||
//
|
|
||||||
// Ex: v128_add32 gets remapped to the appropriate architecture intrinsic.
|
|
||||||
//
|
|
||||||
// New type specification includes element size because it's significant on
|
|
||||||
// AArch64. For x86_64 they'r all maped to v128_t. On arm the default is
|
|
||||||
// v128u32_t.
|
|
||||||
//
|
|
||||||
// v128_t, v1q28u64_t, v128u32_t.
|
|
||||||
//
|
|
||||||
// [prefix] is changed to "v128" or size specific for typedef.
|
|
||||||
//
|
|
||||||
// Vector constants
|
|
||||||
//
|
|
||||||
// Vector constants are a big problem because they technically don't exist.
|
|
||||||
// All vectors used as constants either reside in memory or must be genererated
|
|
||||||
// at run time at significant cost. The cost of generating a constant
|
|
||||||
// increases non-linearly with the number of vector elements. A 4 element
|
|
||||||
// vector costs between 7 and 11 clocks to generate, an 8 element vector
|
|
||||||
// is 15-25 clocks. There are also additional clock due to data dependency
|
|
||||||
// stalls.
|
|
||||||
//
|
|
||||||
// Vector constants are often used as control indexes for permute, blend, etc,
|
|
||||||
// where generating the index can be over 90% of the operation. This is
|
|
||||||
// where the problem occurs. An instruction that only requires one to 3
|
|
||||||
// clocks needs may times more just to build the index argument.
|
|
||||||
//
|
|
||||||
// There is very little a programmer can do to avoid the worst case scenarios.
|
|
||||||
// Smaller integers can be merged to form 64 bit integers, and vectors with
|
|
||||||
// repeated elements can be generated more efficiently but they have limited
|
|
||||||
// benefit and limited application.
|
|
||||||
//
|
|
||||||
// If a vector constant is to be used repeatedly it is better to define a local
|
|
||||||
// variable to generate the constant only once.
|
|
||||||
//
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <memory.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// AVX512 macros are not a reliable indicator of 512 bit vector capability
|
|
||||||
// because they get defined with AVX10_1_256 which doesn't support 512 bit.
|
|
||||||
// EVEX512 is also unreliable as it can also be defined when 512b is not
|
|
||||||
// available.
|
|
||||||
// Use AVX10_1_512 for 512b & AVX10_1_256 for 256b whenever AVX10 is present.
|
|
||||||
// Use AVX512 macros only whithout AVX10.
|
|
||||||
|
|
||||||
/*
|
|
||||||
// Test for macros
|
|
||||||
#ifdef __AVX10_1__
|
|
||||||
#warning "__AVX10_1__"
|
|
||||||
#endif
|
|
||||||
#ifdef __AVX10_1_256__
|
|
||||||
#warning "__AVX10_1_256__"
|
|
||||||
#endif
|
|
||||||
#ifdef __AVX10_1_512__
|
|
||||||
#warning "__AVX10_1_512__"
|
|
||||||
#endif
|
|
||||||
#ifdef __EVEX256__
|
|
||||||
#warning "__EVEX256__"
|
|
||||||
#endif
|
|
||||||
#ifdef __EVEX512__
|
|
||||||
#warning "__EVEX512__"
|
|
||||||
#endif
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
|
||||||
#warning "AVX512"
|
|
||||||
#endif
|
|
||||||
*/
|
|
||||||
|
|
||||||
// SIMD512: Use 512, 256 & 128 bit vectors, AVX512VBMI is not included and
|
|
||||||
// must be tested seperately.
|
|
||||||
// VL256: Include AVX512VL instructions for 256 & 128 bit vectors.
|
|
||||||
// VBMI: Include AVX512VBMI instructions for supported vector lengths.
|
|
||||||
|
|
||||||
#if defined(__AVX10_1__)
|
|
||||||
|
|
||||||
#define VL256 1
|
|
||||||
#define VBMI 1
|
|
||||||
#if defined(__AVX10_1_512__)
|
|
||||||
#define SIMD512 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
|
||||||
|
|
||||||
#define VL256 1
|
|
||||||
#define SIMD512 1
|
|
||||||
#if defined(__AVX512VBMI__)
|
|
||||||
#define VBMI 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
#if defined(SIMD512)
|
|
||||||
#warning "SIMD512"
|
|
||||||
#endif
|
|
||||||
#if defined(VBMI)
|
|
||||||
#warning "VBMI"
|
|
||||||
#endif
|
|
||||||
#if defined(VL256)
|
|
||||||
#warning "VL256"
|
|
||||||
#endif
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if defined(__x86_64__)
|
|
||||||
|
|
||||||
#include <x86intrin.h>
|
|
||||||
|
|
||||||
#elif defined(__aarch64__)
|
|
||||||
|
|
||||||
#include <arm_neon.h>
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "simd-utils/simd-int.h"
|
|
||||||
|
|
||||||
// x86_64 SSE2 128 bit vectors
|
|
||||||
#include "simd-utils/simd-128.h"
|
|
||||||
|
|
||||||
// x86_64 AVX2 256 bit vectors
|
|
||||||
#include "simd-utils/simd-256.h"
|
|
||||||
|
|
||||||
// x86_64 AVX512 512 bit vectors
|
|
||||||
#include "simd-utils/simd-512.h"
|
|
||||||
|
|
||||||
// aarch64 neon 128 bit vectors
|
|
||||||
#include "simd-utils/simd-neon.h"
|
|
||||||
|
|
||||||
#include "simd-utils/intrlv.h"
|
|
||||||
|
|
||||||
#endif // SIMD_UTILS_H__
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,80 +0,0 @@
|
|||||||
Quickstart:
|
|
||||||
----------
|
|
||||||
|
|
||||||
First time mining verthash or don't have a Verthash data file:
|
|
||||||
|
|
||||||
--algo verthash --verify --url ...
|
|
||||||
|
|
||||||
Verthash data file already exists:
|
|
||||||
|
|
||||||
--algo verthash --data-file /path/to/verthash.dat --url ...
|
|
||||||
|
|
||||||
|
|
||||||
Background:
|
|
||||||
----------
|
|
||||||
|
|
||||||
Verthash algorithm requires a data file for hashing. This file is
|
|
||||||
static, portable, and only needs to be created once.
|
|
||||||
|
|
||||||
A Verthash data file created by VerthashMiner can also be used by cpuminer-opt
|
|
||||||
and used simultaneously by both miners.
|
|
||||||
|
|
||||||
Due to its size >1GB it is recommened one data file be created and
|
|
||||||
stored in a permanent location accessible to any miner that wants to use it.
|
|
||||||
|
|
||||||
New command line options:
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
cpuminer-opt adds two new command line options for verthash. The names
|
|
||||||
and some behaviour is changed from VerthashMiner.
|
|
||||||
|
|
||||||
--data-file /path/to/verthash.dat
|
|
||||||
default when not used is verthash.dat in current working directory.
|
|
||||||
|
|
||||||
--verify
|
|
||||||
verify integrity of file specified by --data-file, or if not specified
|
|
||||||
the default data file if it exists, or create a default file and verify it
|
|
||||||
if one does not yet exist. Data file verification is disabled by default.
|
|
||||||
|
|
||||||
Detailed usage:
|
|
||||||
--------------
|
|
||||||
|
|
||||||
If a data file already exists it can be selected using the --data-file
|
|
||||||
option to specify the path and name of the file.
|
|
||||||
|
|
||||||
--algo verthash --datafile /path/to/verthash.dat --url ...
|
|
||||||
|
|
||||||
If the --data-file option is not used the default is to use 'verthash.dat'
|
|
||||||
from the current working directory.
|
|
||||||
|
|
||||||
If no data file exists it can be created by using the --verify option
|
|
||||||
without the --data-file option. If the default data file is not found in
|
|
||||||
the current directory it will be created.
|
|
||||||
|
|
||||||
--algo verthash --verify --url ...
|
|
||||||
|
|
||||||
Data file creation can take up to 30 minutes on a spinning hard drive.
|
|
||||||
Once created the new data file will be verified and used immediately
|
|
||||||
if a valid url and user were included on the command line.
|
|
||||||
|
|
||||||
A default data file can be created by ommitting the url option. That will
|
|
||||||
either verify an existing default data file or create one and verify it,
|
|
||||||
then exit.
|
|
||||||
|
|
||||||
--algo verthash --verify
|
|
||||||
|
|
||||||
A data file will never be created if --data-file is specified. The miner
|
|
||||||
will exit with an error if the file is not found in the specified location.
|
|
||||||
This is to avoid accidentally creating an unwanted data file due to a typo.
|
|
||||||
|
|
||||||
After creation the data file can moved to a more convenient location and
|
|
||||||
referenced by --data-file, or left where it is and used by default without the
|
|
||||||
--data-file option.
|
|
||||||
|
|
||||||
Data file verification takes a few seconds and is disabled by default.
|
|
||||||
VerthashMiner enables data file verification by default and has an option to
|
|
||||||
disable it.
|
|
||||||
|
|
||||||
The --verify option is intended primarily to create a new file. It's
|
|
||||||
not necessary or useful to verify a file every time the miner is started.
|
|
||||||
|
|
@@ -1,131 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# Script for building Windows binaries release package using mingw.
|
|
||||||
# Requires a custom mingw environment, not intended for users.
|
|
||||||
#
|
|
||||||
# Compiles Windows EXE files for selected CPU architectures, copies them
|
|
||||||
# as well as some DLLs that aren't available in most Windows environments
|
|
||||||
# into a release folder ready to be zipped and uploaded.
|
|
||||||
|
|
||||||
# define some local variables
|
|
||||||
|
|
||||||
export LOCAL_LIB="$HOME/usr/lib"
|
|
||||||
export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --host=x86_64-w64-mingw32"
|
|
||||||
export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
|
|
||||||
# set correct gcc version
|
|
||||||
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
|
||||||
# used by GCC
|
|
||||||
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs"
|
|
||||||
export DEFAULT_CFLAGS="-maes -O3 -Wall"
|
|
||||||
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
|
|
||||||
|
|
||||||
# make link to local gmp header file.
|
|
||||||
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
|
||||||
|
|
||||||
# make release directory and copy selected DLLs.
|
|
||||||
|
|
||||||
rm -rf release > /dev/null
|
|
||||||
mkdir release
|
|
||||||
|
|
||||||
cp README.txt release/
|
|
||||||
cp README.md release/
|
|
||||||
cp RELEASE_NOTES release/
|
|
||||||
cp verthash-help.txt release/
|
|
||||||
cp $MINGW_LIB/zlib1.dll release/
|
|
||||||
cp $MINGW_LIB/libwinpthread-1.dll release/
|
|
||||||
cp $GCC_MINGW_LIB/libstdc++-6.dll release/
|
|
||||||
cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/
|
|
||||||
cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
|
||||||
|
|
||||||
# Start building...
|
|
||||||
|
|
||||||
# AVX512 SHA VAES: Intel Core Icelake, Rocketlake
|
|
||||||
./clean-all.sh || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
./autogen.sh || echo done
|
|
||||||
CFLAGS="-march=icelake-client $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
|
||||||
|
|
||||||
# AVX512 AES: Intel Core HEDT Slylake-X, Cascadelake
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-march=skylake-avx512 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx512.exe
|
|
||||||
|
|
||||||
# AVX2 SHA VAES: Intel Alderlake, AMD Zen3
|
|
||||||
make clean || echo done
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-mavx2 -msha -mvaes $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx2-sha-vaes.exe
|
|
||||||
|
|
||||||
# AVX2 AES SHA: AMD Zen1
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-march=znver1 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx2-sha.exe
|
|
||||||
|
|
||||||
# AVX2 AES: Intel Core Haswell, Skylake, Kabylake, Coffeelake, Cometlake
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-march=core-avx2 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx2.exe
|
|
||||||
|
|
||||||
# AVX AES: Intel Sandybridge, Ivybridge
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-march=corei7-avx -maes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx.exe
|
|
||||||
|
|
||||||
# SSE4.2 AES: Intel Westmere
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-march=westmere -maes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|
||||||
|
|
||||||
# Nehalem SSE4.2
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="$DEFAULT_CFLAGS_OLD -march=corei7" ./configure $CONFIGURE_ARGS
|
|
||||||
#make
|
|
||||||
#strip -s cpuminer.exe
|
|
||||||
#mv cpuminer.exe release/cpuminer-sse42.exe
|
|
||||||
|
|
||||||
# Core2 SSSE3
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="$DEFAULT_CFLAGS_OLD -march=core2" ./configure $CONFIGURE_ARGS
|
|
||||||
#make
|
|
||||||
#strip -s cpuminer.exe
|
|
||||||
#mv cpuminer.exe release/cpuminer-ssse3.exe
|
|
||||||
#make clean || echo clean
|
|
||||||
|
|
||||||
# Generic SSE2
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-msse2 $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-sse2.exe
|
|
||||||
#make clean || echo clean
|
|
||||||
|
|
||||||
# Native with CPU groups ennabled
|
|
||||||
#make clean || echo clean
|
|
||||||
#rm -f config.status
|
|
||||||
#CFLAGS="-march=native $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
|
||||||
#make -j 8
|
|
||||||
#strip -s cpuminer.exe
|
|
||||||
|
|
Binary file not shown.
52
rin/miner/gpu/RinHash-cuda/CMakeLists.txt
Normal file
52
rin/miner/gpu/RinHash-cuda/CMakeLists.txt
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.18)
|
||||||
|
project(RinHashCUDA LANGUAGES CXX CUDA)
|
||||||
|
|
||||||
|
# Set C++ standard
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
set(CMAKE_CUDA_STANDARD 11)
|
||||||
|
|
||||||
|
# Find CUDA
|
||||||
|
find_package(CUDA REQUIRED)
|
||||||
|
|
||||||
|
# Set CUDA architectures
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES "50;52;60;61;70;75;80;86")
|
||||||
|
|
||||||
|
# Include directories
|
||||||
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
|
# Source files
|
||||||
|
set(CUDA_SOURCES
|
||||||
|
rinhash.cu
|
||||||
|
sha3-256.cu
|
||||||
|
)
|
||||||
|
|
||||||
|
set(HEADERS
|
||||||
|
rinhash_device.cuh
|
||||||
|
argon2d_device.cuh
|
||||||
|
blake3_device.cuh
|
||||||
|
blaze3_cpu.cuh
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create executable
|
||||||
|
add_executable(rinhash-cuda-miner ${CUDA_SOURCES} ${HEADERS})
|
||||||
|
|
||||||
|
# Set CUDA properties
|
||||||
|
set_target_properties(rinhash-cuda-miner PROPERTIES
|
||||||
|
CUDA_RUNTIME_LIBRARY Shared
|
||||||
|
)
|
||||||
|
|
||||||
|
# Link CUDA libraries
|
||||||
|
target_link_libraries(rinhash-cuda-miner
|
||||||
|
${CUDA_LIBRARIES}
|
||||||
|
${CUDA_CUDART_LIBRARY}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Compiler-specific options
|
||||||
|
if(MSVC)
|
||||||
|
target_compile_options(rinhash-cuda-miner PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-O3>)
|
||||||
|
else()
|
||||||
|
target_compile_options(rinhash-cuda-miner PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-O3>)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Install target
|
||||||
|
install(TARGETS rinhash-cuda-miner DESTINATION bin)
|
21
rin/miner/gpu/RinHash-cuda/LICENSE
Normal file
21
rin/miner/gpu/RinHash-cuda/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 Rin coin
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
40
rin/miner/gpu/RinHash-cuda/Makefile
Normal file
40
rin/miner/gpu/RinHash-cuda/Makefile
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# RinHash CUDA Miner Makefile
|
||||||
|
# CUDA implementation of RinHash algorithm for GPU mining
|
||||||
|
|
||||||
|
# Compiler and flags
|
||||||
|
NVCC = nvcc
|
||||||
|
CUDA_ARCH = -arch=sm_50 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
|
||||||
|
NVCC_FLAGS = -O3 -std=c++11 -Xcompiler -fPIC
|
||||||
|
INCLUDES = -I.
|
||||||
|
LIBS = -lcuda -lcudart
|
||||||
|
|
||||||
|
# Source files
|
||||||
|
CUDA_SOURCES = rinhash.cu sha3-256.cu
|
||||||
|
HEADERS = rinhash_device.cuh argon2d_device.cuh blake3_device.cuh blaze3_cpu.cuh
|
||||||
|
|
||||||
|
# Output executable
|
||||||
|
TARGET = rinhash-cuda-miner.exe
|
||||||
|
|
||||||
|
# Build targets
|
||||||
|
all: $(TARGET)
|
||||||
|
|
||||||
|
$(TARGET): $(CUDA_SOURCES) $(HEADERS)
|
||||||
|
$(NVCC) $(NVCC_FLAGS) $(CUDA_ARCH) $(INCLUDES) $(CUDA_SOURCES) -o $(TARGET) $(LIBS)
|
||||||
|
|
||||||
|
# Clean build artifacts
|
||||||
|
clean:
|
||||||
|
del /Q $(TARGET) *.obj 2>nul || true
|
||||||
|
|
||||||
|
# Install target (copy to main directory)
|
||||||
|
install: $(TARGET)
|
||||||
|
copy $(TARGET) ..\..\$(TARGET)
|
||||||
|
|
||||||
|
# Debug build
|
||||||
|
debug: NVCC_FLAGS += -g -G -DDEBUG
|
||||||
|
debug: $(TARGET)
|
||||||
|
|
||||||
|
# Test run
|
||||||
|
test: $(TARGET)
|
||||||
|
.\$(TARGET) --help
|
||||||
|
|
||||||
|
.PHONY: all clean install debug test
|
26
rin/miner/gpu/RinHash-cuda/README.md
Normal file
26
rin/miner/gpu/RinHash-cuda/README.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# RinHash CUDA Implementation
|
||||||
|
|
||||||
|
🚀 High-performance GPU implementation of RinHash – an ASIC-resistant hashing algorithm designed for RinCoin mining.
|
||||||
|
|
||||||
|
## 🔧 Algorithm Overview
|
||||||
|
|
||||||
|
RinHash is a custom Proof-of-Work algorithm designed to resist ASICs by combining three cryptographic hash functions:
|
||||||
|
|
||||||
|
1. **BLAKE3** – Fast and modern hashing.
|
||||||
|
2. **Argon2d** – Memory-hard password hashing (64KB, 2 iterations).
|
||||||
|
3. **SHA3-256** – Secure final hash.
|
||||||
|
|
||||||
|
The final output is a 32-byte SHA3-256 digest of the Argon2d result, which itself is applied to the BLAKE3 hash of the input block header.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 💻 CUDA Implementation
|
||||||
|
|
||||||
|
This repository contains a full GPU-based implementation of RinHash, ported to CUDA for use in high-efficiency miners. Key features include:
|
||||||
|
|
||||||
|
- Full GPU parallelization of BLAKE3, Argon2d, and SHA3-256
|
||||||
|
- Memory-hard Argon2d executed entirely on device memory
|
||||||
|
- Batch processing support for multiple nonces
|
||||||
|
- Matching hash output with official CPU implementation
|
||||||
|
|
||||||
|
---
|
24
rin/miner/gpu/RinHash-cuda/RinHashCUDA.sln
Normal file
24
rin/miner/gpu/RinHash-cuda/RinHashCUDA.sln
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Version 17
|
||||||
|
VisualStudioVersion = 17.0.31903.59
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RinHashCUDA", "RinHashCUDA.vcxproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|x64 = Debug|x64
|
||||||
|
Release|x64 = Release|x64
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x64.ActiveCfg = Debug|x64
|
||||||
|
{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x64.Build.0 = Debug|x64
|
||||||
|
{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x64.ActiveCfg = Release|x64
|
||||||
|
{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x64.Build.0 = Release|x64
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {12345678-1234-5678-9ABC-DEF123456789}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
114
rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj
Normal file
114
rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<VCProjectVersion>16.0</VCProjectVersion>
|
||||||
|
<ProjectGuid>{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}</ProjectGuid>
|
||||||
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
<RootNamespace>RinHashCUDA</RootNamespace>
|
||||||
|
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v143</PlatformToolset>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v143</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="Shared">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
<OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
|
||||||
|
<IntDir>$(SolutionDir)obj\$(Platform)\$(Configuration)\</IntDir>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
<OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
|
||||||
|
<IntDir>$(SolutionDir)obj\$(Platform)\$(Configuration)\</IntDir>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<ConformanceMode>true</ConformanceMode>
|
||||||
|
<AdditionalIncludeDirectories>$(CUDA_PATH)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
|
<AdditionalDependencies>cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<ConformanceMode>true</ConformanceMode>
|
||||||
|
<AdditionalIncludeDirectories>$(CUDA_PATH)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
|
<AdditionalDependencies>cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="rinhash.cu">
|
||||||
|
<FileType>CUDA C/C++</FileType>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="sha3-256.cu">
|
||||||
|
<FileType>CUDA C/C++</FileType>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="argon2d_device.cuh" />
|
||||||
|
<ClInclude Include="blake3_device.cuh" />
|
||||||
|
<ClInclude Include="blaze3_cpu.cuh" />
|
||||||
|
<ClInclude Include="rinhash_device.cuh" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<None Include="build-cuda.bat" />
|
||||||
|
<None Include="CMakeLists.txt" />
|
||||||
|
<None Include="Makefile" />
|
||||||
|
<None Include="README.md" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 12.5.targets" />
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
56
rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj.filters
Normal file
56
rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj.filters
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup>
|
||||||
|
<Filter Include="Source Files">
|
||||||
|
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D38A0280}</UniqueIdentifier>
|
||||||
|
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Header Files">
|
||||||
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||||
|
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="CUDA Files">
|
||||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||||
|
<Extensions>cu;cuh</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Build Files">
|
||||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD02}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="rinhash.cu">
|
||||||
|
<Filter>CUDA Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="sha3-256.cu">
|
||||||
|
<Filter>CUDA Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="argon2d_device.cuh">
|
||||||
|
<Filter>CUDA Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="blake3_device.cuh">
|
||||||
|
<Filter>CUDA Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="blaze3_cpu.cuh">
|
||||||
|
<Filter>CUDA Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="rinhash_device.cuh">
|
||||||
|
<Filter>CUDA Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<None Include="build-cuda.bat">
|
||||||
|
<Filter>Build Files</Filter>
|
||||||
|
</None>
|
||||||
|
<None Include="CMakeLists.txt">
|
||||||
|
<Filter>Build Files</Filter>
|
||||||
|
</None>
|
||||||
|
<None Include="Makefile">
|
||||||
|
<Filter>Build Files</Filter>
|
||||||
|
</None>
|
||||||
|
<None Include="README.md">
|
||||||
|
<Filter>Build Files</Filter>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
918
rin/miner/gpu/RinHash-cuda/argon2d_device.cuh
Normal file
918
rin/miner/gpu/RinHash-cuda/argon2d_device.cuh
Normal file
@@ -0,0 +1,918 @@
|
|||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <device_launch_parameters.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
//=== Argon2 定数 ===//
|
||||||
|
#define ARGON2_BLOCK_SIZE 1024
|
||||||
|
#define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8)
|
||||||
|
#define ARGON2_OWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 16)
|
||||||
|
#define ARGON2_HWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 32)
|
||||||
|
#define ARGON2_SYNC_POINTS 4
|
||||||
|
#define ARGON2_PREHASH_DIGEST_LENGTH 64
|
||||||
|
#define ARGON2_PREHASH_SEED_LENGTH 72
|
||||||
|
#define ARGON2_VERSION_10 0x10
|
||||||
|
#define ARGON2_VERSION_13 0x13
|
||||||
|
#define ARGON2_ADDRESSES_IN_BLOCK 128
|
||||||
|
|
||||||
|
//=== Blake2b 定数 ===//
|
||||||
|
#define BLAKE2B_BLOCKBYTES 128
|
||||||
|
#define BLAKE2B_OUTBYTES 64
|
||||||
|
#define BLAKE2B_KEYBYTES 64
|
||||||
|
#define BLAKE2B_SALTBYTES 16
|
||||||
|
#define BLAKE2B_PERSONALBYTES 16
|
||||||
|
#define BLAKE2B_ROUNDS 12
|
||||||
|
|
||||||
|
//=== 構造体定義 ===//
|
||||||
|
typedef struct __align__(64) block_ {
|
||||||
|
uint64_t v[ARGON2_QWORDS_IN_BLOCK];
|
||||||
|
} block;
|
||||||
|
|
||||||
|
typedef struct Argon2_instance_t {
|
||||||
|
block *memory; /* Memory pointer */
|
||||||
|
uint32_t version;
|
||||||
|
uint32_t passes; /* Number of passes */
|
||||||
|
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||||
|
uint32_t segment_length;
|
||||||
|
uint32_t lane_length;
|
||||||
|
uint32_t lanes;
|
||||||
|
uint32_t threads;
|
||||||
|
int print_internals; /* whether to print the memory blocks */
|
||||||
|
} argon2_instance_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Argon2 position: where we construct the block right now. Used to distribute
|
||||||
|
* work between threads.
|
||||||
|
*/
|
||||||
|
typedef struct Argon2_position_t {
|
||||||
|
uint32_t pass;
|
||||||
|
uint32_t lane;
|
||||||
|
uint8_t slice;
|
||||||
|
uint32_t index;
|
||||||
|
} argon2_position_t;
|
||||||
|
|
||||||
|
typedef struct __blake2b_state {
|
||||||
|
uint64_t h[8];
|
||||||
|
uint64_t t[2];
|
||||||
|
uint64_t f[2];
|
||||||
|
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||||
|
unsigned buflen;
|
||||||
|
unsigned outlen;
|
||||||
|
uint8_t last_node;
|
||||||
|
} blake2b_state;
|
||||||
|
|
||||||
|
typedef struct __blake2b_param {
|
||||||
|
uint8_t digest_length; /* 1 */
|
||||||
|
uint8_t key_length; /* 2 */
|
||||||
|
uint8_t fanout; /* 3 */
|
||||||
|
uint8_t depth; /* 4 */
|
||||||
|
uint32_t leaf_length; /* 8 */
|
||||||
|
uint64_t node_offset; /* 16 */
|
||||||
|
uint8_t node_depth; /* 17 */
|
||||||
|
uint8_t inner_length; /* 18 */
|
||||||
|
uint8_t reserved[14]; /* 32 */
|
||||||
|
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||||
|
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||||
|
} blake2b_param;
|
||||||
|
|
||||||
|
//=== 定数メモリ ===//
|
||||||
|
__constant__ uint64_t blake2b_IV[8] = {
|
||||||
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||||
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||||
|
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||||
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
__constant__ uint8_t blake2b_sigma[12][16] = {
|
||||||
|
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||||
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||||
|
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||||
|
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||||
|
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||||
|
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||||
|
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||||
|
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||||
|
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||||
|
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||||
|
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||||
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}
|
||||||
|
};
|
||||||
|
|
||||||
|
//=== 共通ヘルパー関数 ===//
|
||||||
|
__device__ __forceinline__ uint64_t rotr64(uint64_t x, uint32_t n) {
|
||||||
|
return (x >> n) | (x << (64 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
// fBlaMka関数をCリファレンス実装と完全に一致させる
|
||||||
|
__device__ __forceinline__ uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||||
|
const uint64_t m = 0xFFFFFFFFULL;
|
||||||
|
uint64_t xy = (x & m) * (y & m);
|
||||||
|
return x + y + 2 * xy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b G関数 - リファレンス実装と完全に一致させる
|
||||||
|
__device__ __forceinline__ void blake2b_G(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t m1, uint64_t m2) {
|
||||||
|
a = a + b + m1;
|
||||||
|
d = rotr64(d ^ a, 32);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 24);
|
||||||
|
a = a + b + m2;
|
||||||
|
d = rotr64(d ^ a, 16);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 63);
|
||||||
|
}
|
||||||
|
|
||||||
|
// リトルエンディアンでの32ビット値の格納
|
||||||
|
__device__ __forceinline__ void store32(void *dst, uint32_t w) {
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
memcpy(dst, &w, sizeof w);
|
||||||
|
#else
|
||||||
|
uint8_t *p = (uint8_t *)dst;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
w >>= 8;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
w >>= 8;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
w >>= 8;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ void blake2b_increment_counter(blake2b_state *S,
|
||||||
|
uint64_t inc) {
|
||||||
|
S->t[0] += inc;
|
||||||
|
S->t[1] += (S->t[0] < inc);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void blake2b_set_lastnode(blake2b_state *S) {
|
||||||
|
S->f[1] = (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void blake2b_set_lastblock(blake2b_state *S) {
|
||||||
|
if (S->last_node) {
|
||||||
|
blake2b_set_lastnode(S);
|
||||||
|
}
|
||||||
|
S->f[0] = (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add structure-specific memset function
|
||||||
|
__device__ void blake2b_state_memset(blake2b_state* S) {
|
||||||
|
for (int i = 0; i < sizeof(blake2b_state); i++) {
|
||||||
|
((uint8_t*)S)[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Add missing xor_block function
|
||||||
|
__device__ void xor_block(block* dst, const block* src) {
|
||||||
|
for (int i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
|
||||||
|
dst->v[i] ^= src->v[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// custom memcpy, apparently cuda's memcpy is slow
|
||||||
|
// when called within a kernel
|
||||||
|
__device__ void c_memcpy(void *dest, const void *src, size_t n) {
|
||||||
|
uint8_t *d = (uint8_t*)dest;
|
||||||
|
const uint8_t *s = (const uint8_t*)src;
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
d[i] = s[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add missing copy_block function
|
||||||
|
__device__ void copy_block(block* dst, const block* src) {
|
||||||
|
c_memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill_blockをCリファレンス実装と完全に一致させる
|
||||||
|
__device__ void fill_block(const block* prev_block, const block* ref_block, block* next_block, int with_xor) {
|
||||||
|
block blockR = {};
|
||||||
|
block block_tmp = {};
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
copy_block(&blockR, ref_block);
|
||||||
|
xor_block(&blockR, prev_block);
|
||||||
|
copy_block(&block_tmp, &blockR);
|
||||||
|
|
||||||
|
if (with_xor) {
|
||||||
|
xor_block(&block_tmp, next_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
// G function without macro
|
||||||
|
auto g = [](uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d) {
|
||||||
|
a = fBlaMka(a, b);
|
||||||
|
d = rotr64(d ^ a, 32);
|
||||||
|
c = fBlaMka(c, d);
|
||||||
|
b = rotr64(b ^ c, 24);
|
||||||
|
a = fBlaMka(a, b);
|
||||||
|
d = rotr64(d ^ a, 16);
|
||||||
|
c = fBlaMka(c, d);
|
||||||
|
b = rotr64(b ^ c, 63);
|
||||||
|
};
|
||||||
|
|
||||||
|
// BLAKE2_ROUND_NOMSG function without macro
|
||||||
|
auto blake2_round = [&g](uint64_t& v0, uint64_t& v1, uint64_t& v2, uint64_t& v3,
|
||||||
|
uint64_t& v4, uint64_t& v5, uint64_t& v6, uint64_t& v7,
|
||||||
|
uint64_t& v8, uint64_t& v9, uint64_t& v10, uint64_t& v11,
|
||||||
|
uint64_t& v12, uint64_t& v13, uint64_t& v14, uint64_t& v15) {
|
||||||
|
do {
|
||||||
|
g(v0, v4, v8, v12);
|
||||||
|
g(v1, v5, v9, v13);
|
||||||
|
g(v2, v6, v10, v14);
|
||||||
|
g(v3, v7, v11, v15);
|
||||||
|
g(v0, v5, v10, v15);
|
||||||
|
g(v1, v6, v11, v12);
|
||||||
|
g(v2, v7, v8, v13);
|
||||||
|
g(v3, v4, v9, v14);
|
||||||
|
} while ((void)0, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Apply Blake2 on columns
|
||||||
|
for (i = 0; i < 8; ++i) {
|
||||||
|
blake2_round(
|
||||||
|
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||||
|
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||||
|
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||||
|
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||||
|
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||||
|
blockR.v[16 * i + 15]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply Blake2 on rows
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
blake2_round(
|
||||||
|
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||||
|
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||||
|
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||||
|
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||||
|
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||||
|
blockR.v[2 * i + 113]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
copy_block(next_block, &block_tmp);
|
||||||
|
xor_block(next_block, &blockR);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename ptr_t>
|
||||||
|
__device__ void c_memset(ptr_t dest, T val, int count) {
|
||||||
|
for(int i=0; i<count; i++)
|
||||||
|
dest[i] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void init_block_value(block *b, uint8_t in) { c_memset(b->v, in, sizeof(b->v)); }
|
||||||
|
|
||||||
|
__device__ void next_addresses(block *address_block, block *input_block,
|
||||||
|
const block *zero_block) {
|
||||||
|
input_block->v[6]++;
|
||||||
|
fill_block(zero_block, input_block, address_block, 0);
|
||||||
|
fill_block(zero_block, address_block, address_block, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void G1(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t x, uint64_t y) {
|
||||||
|
a = a + b + x;
|
||||||
|
d = rotr64(d ^ a, 32);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 24);
|
||||||
|
a = a + b + y;
|
||||||
|
d = rotr64(d ^ a, 16);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 63);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b compression function F
|
||||||
|
__device__ void blake2b_compress(blake2b_state* S, const uint8_t block[BLAKE2B_BLOCKBYTES]) {
|
||||||
|
uint64_t m[16];
|
||||||
|
uint64_t v[16];
|
||||||
|
|
||||||
|
// Load message block into m[16]
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
const uint8_t* p = block + i * 8;
|
||||||
|
m[i] = ((uint64_t)p[0])
|
||||||
|
| ((uint64_t)p[1] << 8)
|
||||||
|
| ((uint64_t)p[2] << 16)
|
||||||
|
| ((uint64_t)p[3] << 24)
|
||||||
|
| ((uint64_t)p[4] << 32)
|
||||||
|
| ((uint64_t)p[5] << 40)
|
||||||
|
| ((uint64_t)p[6] << 48)
|
||||||
|
| ((uint64_t)p[7] << 56);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize v[0..15]
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
v[i] = S->h[i];
|
||||||
|
v[i + 8] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
v[12] ^= S->t[0];
|
||||||
|
v[13] ^= S->t[1];
|
||||||
|
v[14] ^= S->f[0];
|
||||||
|
v[15] ^= S->f[1];
|
||||||
|
|
||||||
|
for (int r = 0; r < BLAKE2B_ROUNDS; r++) {
|
||||||
|
const uint8_t* s = blake2b_sigma[r];
|
||||||
|
|
||||||
|
// Column step
|
||||||
|
G1(v[0], v[4], v[8], v[12], m[s[0]], m[s[1]]);
|
||||||
|
G1(v[1], v[5], v[9], v[13], m[s[2]], m[s[3]]);
|
||||||
|
G1(v[2], v[6], v[10], v[14], m[s[4]], m[s[5]]);
|
||||||
|
G1(v[3], v[7], v[11], v[15], m[s[6]], m[s[7]]);
|
||||||
|
|
||||||
|
// Diagonal step
|
||||||
|
G1(v[0], v[5], v[10], v[15], m[s[8]], m[s[9]]);
|
||||||
|
G1(v[1], v[6], v[11], v[12], m[s[10]], m[s[11]]);
|
||||||
|
G1(v[2], v[7], v[8], v[13], m[s[12]], m[s[13]]);
|
||||||
|
G1(v[3], v[4], v[9], v[14], m[s[14]], m[s[15]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finalization
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
S->h[i] ^= v[i] ^ v[i + 8];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions to load/store 64-bit values in little-endian order
|
||||||
|
__device__ __forceinline__ uint64_t load64(const void* src) {
|
||||||
|
const uint8_t* p = (const uint8_t*)src;
|
||||||
|
return ((uint64_t)(p[0]))
|
||||||
|
| ((uint64_t)(p[1]) << 8)
|
||||||
|
| ((uint64_t)(p[2]) << 16)
|
||||||
|
| ((uint64_t)(p[3]) << 24)
|
||||||
|
| ((uint64_t)(p[4]) << 32)
|
||||||
|
| ((uint64_t)(p[5]) << 40)
|
||||||
|
| ((uint64_t)(p[6]) << 48)
|
||||||
|
| ((uint64_t)(p[7]) << 56);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void store64(void* dst, uint64_t w) {
|
||||||
|
uint8_t* p = (uint8_t*)dst;
|
||||||
|
p[0] = (uint8_t)(w);
|
||||||
|
p[1] = (uint8_t)(w >> 8);
|
||||||
|
p[2] = (uint8_t)(w >> 16);
|
||||||
|
p[3] = (uint8_t)(w >> 24);
|
||||||
|
p[4] = (uint8_t)(w >> 32);
|
||||||
|
p[5] = (uint8_t)(w >> 40);
|
||||||
|
p[6] = (uint8_t)(w >> 48);
|
||||||
|
p[7] = (uint8_t)(w >> 56);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void load_block(block *dst, const void *input) {
|
||||||
|
unsigned i;
|
||||||
|
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||||
|
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void store_block(void *output, const block *src) {
|
||||||
|
unsigned i;
|
||||||
|
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||||
|
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b init function to match reference implementation exactly
|
||||||
|
__device__ int blake2b_init(blake2b_state* S, size_t outlen) {
|
||||||
|
blake2b_param P;
|
||||||
|
// Clear state using our custom function
|
||||||
|
blake2b_state_memset(S);
|
||||||
|
|
||||||
|
// Set parameters according to Blake2b spec
|
||||||
|
P.digest_length = (uint8_t)outlen;
|
||||||
|
P.key_length = 0;
|
||||||
|
P.fanout = 1;
|
||||||
|
P.depth = 1;
|
||||||
|
P.leaf_length = 0;
|
||||||
|
P.node_offset = 0;
|
||||||
|
P.node_depth = 0;
|
||||||
|
P.inner_length = 0;
|
||||||
|
c_memset(P.reserved, 0, sizeof(P.reserved));
|
||||||
|
c_memset(P.salt, 0, sizeof(P.salt));
|
||||||
|
c_memset(P.personal, 0, sizeof(P.personal));
|
||||||
|
|
||||||
|
// Initialize state vector with IV
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
S->h[i] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned char *p = (const unsigned char *)(&P);
|
||||||
|
/* IV XOR Parameter Block */
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||||
|
}
|
||||||
|
S->outlen = P.digest_length;
|
||||||
|
return 0; // Success
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ int FLAG_clear_internal_memory = 0;
|
||||||
|
__device__ void clear_internal_memory(void *v, size_t n) {
|
||||||
|
if (FLAG_clear_internal_memory && v) {
|
||||||
|
// secure_wipe_memory(v, n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b update function to match reference implementation
|
||||||
|
__device__ int blake2b_update(blake2b_state* S, const uint8_t* in, size_t inlen) {
|
||||||
|
const uint8_t *pin = (const uint8_t *)in;
|
||||||
|
|
||||||
|
if (inlen == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sanity check */
|
||||||
|
if (S == NULL || in == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Is this a reused state? */
|
||||||
|
if (S->f[0] != 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
|
||||||
|
/* Complete current block */
|
||||||
|
size_t left = S->buflen;
|
||||||
|
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||||
|
c_memcpy(&S->buf[left], pin, fill);
|
||||||
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
|
blake2b_compress(S, S->buf);
|
||||||
|
S->buflen = 0;
|
||||||
|
inlen -= fill;
|
||||||
|
pin += fill;
|
||||||
|
/* Avoid buffer copies when possible */
|
||||||
|
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||||
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
|
blake2b_compress(S, pin);
|
||||||
|
inlen -= BLAKE2B_BLOCKBYTES;
|
||||||
|
pin += BLAKE2B_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c_memcpy(&S->buf[S->buflen], pin, inlen);
|
||||||
|
S->buflen += (unsigned int)inlen;
|
||||||
|
return 0; // Success
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b final function to match reference implementation
|
||||||
|
__device__ int blake2b_final(blake2b_state* S, uint8_t* out, size_t outlen) {
|
||||||
|
if (!S || !out)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||||
|
unsigned int i;
|
||||||
|
blake2b_increment_counter(S, S->buflen);
|
||||||
|
blake2b_set_lastblock(S);
|
||||||
|
c_memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||||
|
blake2b_compress(S, S->buf);
|
||||||
|
|
||||||
|
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||||
|
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
c_memcpy(out, buffer, S->outlen);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||||
|
size_t keylen) {
|
||||||
|
blake2b_param P;
|
||||||
|
|
||||||
|
if (S == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setup Parameter Block for keyed BLAKE2 */
|
||||||
|
P.digest_length = (uint8_t)outlen;
|
||||||
|
P.key_length = (uint8_t)keylen;
|
||||||
|
P.fanout = 1;
|
||||||
|
P.depth = 1;
|
||||||
|
P.leaf_length = 0;
|
||||||
|
P.node_offset = 0;
|
||||||
|
P.node_depth = 0;
|
||||||
|
P.inner_length = 0;
|
||||||
|
c_memset(P.reserved, 0, sizeof(P.reserved));
|
||||||
|
c_memset(P.salt, 0, sizeof(P.salt));
|
||||||
|
c_memset(P.personal, 0, sizeof(P.personal));
|
||||||
|
|
||||||
|
// Initialize state vector with IV
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
S->h[i] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// XOR first element with param
|
||||||
|
const unsigned char *p = (const unsigned char *)(&P);
|
||||||
|
/* IV XOR Parameter Block */
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||||
|
}
|
||||||
|
S->outlen = P.digest_length;
|
||||||
|
|
||||||
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||||
|
c_memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||||
|
c_memcpy(block, key, keylen);
|
||||||
|
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||||
|
/* Burn the key from stack */
|
||||||
|
clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b all-in-one function
|
||||||
|
__device__ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||||
|
const void *key, size_t keylen) {
|
||||||
|
blake2b_state S;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if (NULL == in && inlen > 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keylen > 0) {
|
||||||
|
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (blake2b_init(&S, outlen) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blake2b_update(&S, (const uint8_t*)in, inlen) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
ret = blake2b_final(&S, (uint8_t*)out, outlen);
|
||||||
|
|
||||||
|
fail:
|
||||||
|
clear_internal_memory(&S, sizeof(S));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// index_alpha関数を完全にCリファレンス実装と一致させる(関数のシグネチャも含め)
|
||||||
|
__device__ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||||
|
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||||
|
int same_lane) {
|
||||||
|
uint32_t reference_area_size;
|
||||||
|
uint64_t relative_position;
|
||||||
|
uint32_t start_position, absolute_position;
|
||||||
|
|
||||||
|
if (0 == position->pass) {
|
||||||
|
/* First pass */
|
||||||
|
if (0 == position->slice) {
|
||||||
|
/* First slice */
|
||||||
|
reference_area_size =
|
||||||
|
position->index - 1; /* all but the previous */
|
||||||
|
} else {
|
||||||
|
if (same_lane) {
|
||||||
|
/* The same lane => add current segment */
|
||||||
|
reference_area_size =
|
||||||
|
position->slice * instance->segment_length +
|
||||||
|
position->index - 1;
|
||||||
|
} else {
|
||||||
|
reference_area_size =
|
||||||
|
position->slice * instance->segment_length +
|
||||||
|
((position->index == 0) ? (-1) : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Second pass */
|
||||||
|
if (same_lane) {
|
||||||
|
reference_area_size = instance->lane_length -
|
||||||
|
instance->segment_length + position->index -
|
||||||
|
1;
|
||||||
|
} else {
|
||||||
|
reference_area_size = instance->lane_length -
|
||||||
|
instance->segment_length +
|
||||||
|
((position->index == 0) ? (-1) : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||||
|
* relative position */
|
||||||
|
relative_position = pseudo_rand;
|
||||||
|
relative_position = relative_position * relative_position >> 32;
|
||||||
|
relative_position = reference_area_size - 1 -
|
||||||
|
(reference_area_size * relative_position >> 32);
|
||||||
|
|
||||||
|
/* 1.2.5 Computing starting position */
|
||||||
|
start_position = 0;
|
||||||
|
|
||||||
|
if (0 != position->pass) {
|
||||||
|
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||||
|
? 0
|
||||||
|
: (position->slice + 1) * instance->segment_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.6. Computing absolute position */
|
||||||
|
absolute_position = (start_position + relative_position) %
|
||||||
|
instance->lane_length; /* absolute position */
|
||||||
|
return absolute_position;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill_segment関数を追加(Cリファレンス実装と完全に一致)
|
||||||
|
__device__ void fill_segment(const argon2_instance_t *instance,
|
||||||
|
argon2_position_t position) {
|
||||||
|
block *ref_block = NULL, *curr_block = NULL;
|
||||||
|
block address_block, input_block, zero_block;
|
||||||
|
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||||
|
uint32_t prev_offset, curr_offset;
|
||||||
|
uint32_t starting_index;
|
||||||
|
uint32_t i;
|
||||||
|
int data_independent_addressing;
|
||||||
|
|
||||||
|
|
||||||
|
data_independent_addressing = false;
|
||||||
|
|
||||||
|
if (data_independent_addressing) {
|
||||||
|
init_block_value(&zero_block, 0);
|
||||||
|
init_block_value(&input_block, 0);
|
||||||
|
|
||||||
|
input_block.v[0] = position.pass;
|
||||||
|
input_block.v[1] = position.lane;
|
||||||
|
input_block.v[2] = position.slice;
|
||||||
|
input_block.v[3] = instance->memory_blocks;
|
||||||
|
input_block.v[4] = instance->passes;
|
||||||
|
input_block.v[5] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
starting_index = 0;
|
||||||
|
|
||||||
|
if ((0 == position.pass) && (0 == position.slice)) {
|
||||||
|
starting_index = 2; /* we have already generated the first two blocks */
|
||||||
|
|
||||||
|
/* Don't forget to generate the first block of addresses: */
|
||||||
|
if (data_independent_addressing) {
|
||||||
|
next_addresses(&address_block, &input_block, &zero_block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Offset of the current block */
|
||||||
|
curr_offset = position.lane * instance->lane_length +
|
||||||
|
position.slice * instance->segment_length + starting_index;
|
||||||
|
|
||||||
|
if (0 == curr_offset % instance->lane_length) {
|
||||||
|
/* Last block in this lane */
|
||||||
|
prev_offset = curr_offset + instance->lane_length - 1;
|
||||||
|
} else {
|
||||||
|
/* Previous block */
|
||||||
|
prev_offset = curr_offset - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = starting_index; i < instance->segment_length;
|
||||||
|
++i, ++curr_offset, ++prev_offset) {
|
||||||
|
/*1.1 Rotating prev_offset if needed */
|
||||||
|
if (curr_offset % instance->lane_length == 1) {
|
||||||
|
prev_offset = curr_offset - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2 Computing the index of the reference block */
|
||||||
|
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||||
|
if (data_independent_addressing) {
|
||||||
|
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||||
|
next_addresses(&address_block, &input_block, &zero_block);
|
||||||
|
}
|
||||||
|
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||||
|
} else {
|
||||||
|
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.2 Computing the lane of the reference block */
|
||||||
|
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||||
|
|
||||||
|
if ((position.pass == 0) && (position.slice == 0)) {
|
||||||
|
/* Can not reference other lanes yet */
|
||||||
|
ref_lane = position.lane;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.3 Computing the number of possible reference block within the
|
||||||
|
* lane.
|
||||||
|
*/
|
||||||
|
position.index = i;
|
||||||
|
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||||
|
ref_lane == position.lane);
|
||||||
|
|
||||||
|
/* 2 Creating a new block */
|
||||||
|
ref_block =
|
||||||
|
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||||
|
curr_block = instance->memory + curr_offset;
|
||||||
|
if (ARGON2_VERSION_10 == instance->version) {
|
||||||
|
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||||
|
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
|
||||||
|
} else {
|
||||||
|
if(0 == position.pass) {
|
||||||
|
fill_block(instance->memory + prev_offset, ref_block,
|
||||||
|
curr_block, 0);
|
||||||
|
} else {
|
||||||
|
fill_block(instance->memory + prev_offset, ref_block,
|
||||||
|
curr_block, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill_memory関数をCリファレンス実装と完全に一致させる
|
||||||
|
__device__ void fill_memory(block* memory, uint32_t passes, uint32_t lanes, uint32_t lane_length, uint32_t segment_length) {
|
||||||
|
argon2_instance_t instance;
|
||||||
|
instance.version = ARGON2_VERSION_13;
|
||||||
|
instance.passes = passes;
|
||||||
|
instance.memory = memory;
|
||||||
|
instance.memory_blocks = lanes * lane_length;
|
||||||
|
instance.segment_length = segment_length;
|
||||||
|
instance.lane_length = lane_length;
|
||||||
|
instance.lanes = lanes;
|
||||||
|
instance.threads = lanes;
|
||||||
|
instance.print_internals = 0;
|
||||||
|
|
||||||
|
argon2_position_t position;
|
||||||
|
for (uint32_t pass = 0; pass < passes; ++pass) {
|
||||||
|
position.pass = pass;
|
||||||
|
for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) {
|
||||||
|
position.slice = slice;
|
||||||
|
for (uint32_t lane = 0; lane < lanes; ++lane) {
|
||||||
|
position.lane = lane;
|
||||||
|
fill_segment(&instance, position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// blake2b_long関数をCリファレンス実装と完全に一致させる
|
||||||
|
__device__ int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||||
|
uint8_t *out = (uint8_t *)pout;
|
||||||
|
blake2b_state blake_state;
|
||||||
|
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
if (outlen > UINT32_MAX) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure little-endian byte order! */
|
||||||
|
store32(outlen_bytes, (uint32_t)outlen);
|
||||||
|
|
||||||
|
#define TRY(statement) \
|
||||||
|
do { \
|
||||||
|
ret = statement; \
|
||||||
|
if (ret < 0) { \
|
||||||
|
goto fail; \
|
||||||
|
} \
|
||||||
|
} while ((void)0, 0)
|
||||||
|
|
||||||
|
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||||
|
TRY(blake2b_init(&blake_state, outlen));
|
||||||
|
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||||
|
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
|
||||||
|
TRY(blake2b_final(&blake_state, out, outlen));
|
||||||
|
} else {
|
||||||
|
uint32_t toproduce;
|
||||||
|
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||||
|
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||||
|
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||||
|
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||||
|
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
|
||||||
|
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||||
|
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||||
|
out += BLAKE2B_OUTBYTES / 2;
|
||||||
|
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||||
|
|
||||||
|
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||||
|
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||||
|
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES, NULL, 0));
|
||||||
|
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||||
|
out += BLAKE2B_OUTBYTES / 2;
|
||||||
|
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||||
|
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||||
|
0));
|
||||||
|
c_memcpy(out, out_buffer, toproduce);
|
||||||
|
}
|
||||||
|
fail:
|
||||||
|
clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||||
|
return ret;
|
||||||
|
#undef TRY
|
||||||
|
}
|
||||||
|
|
||||||
|
// device_argon2d_hash関数を完全にCリファレンス実装と一致させる
|
||||||
|
__device__ void device_argon2d_hash(
|
||||||
|
uint8_t* output,
|
||||||
|
const uint8_t* input, size_t input_len,
|
||||||
|
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
|
||||||
|
block* memory,
|
||||||
|
const uint8_t* salt, size_t salt_len
|
||||||
|
) {
|
||||||
|
// 1. メモリサイズの調整
|
||||||
|
uint32_t memory_blocks = m_cost;
|
||||||
|
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
|
||||||
|
memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t segment_length = memory_blocks / (lanes * ARGON2_SYNC_POINTS);
|
||||||
|
memory_blocks = segment_length * (lanes * ARGON2_SYNC_POINTS);
|
||||||
|
uint32_t lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||||
|
|
||||||
|
// 2. 初期ハッシュの計算
|
||||||
|
uint8_t blockhash[ARGON2_PREHASH_DIGEST_LENGTH];
|
||||||
|
blake2b_state BlakeHash;
|
||||||
|
|
||||||
|
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
uint8_t value[sizeof(uint32_t)];
|
||||||
|
|
||||||
|
store32(&value, lanes);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, 32);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, memory_blocks);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, t_cost);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, ARGON2_VERSION_13);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, 0);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, input_len);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (const uint8_t *)input, input_len);
|
||||||
|
|
||||||
|
store32(&value, salt_len);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (const uint8_t *)salt, salt_len);
|
||||||
|
store32(&value, 0);
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, 0);
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
|
||||||
|
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
// 3. Initialize first blocks in each lane
|
||||||
|
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||||
|
uint8_t initial_hash[ARGON2_PREHASH_SEED_LENGTH];
|
||||||
|
c_memcpy(initial_hash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
c_memset(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0, ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
for (uint32_t l = 0; l < lanes; ++l) {
|
||||||
|
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||||
|
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||||
|
|
||||||
|
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
|
||||||
|
load_block(&memory[l * lane_length], blockhash_bytes);
|
||||||
|
|
||||||
|
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||||
|
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
|
||||||
|
load_block(&memory[l * lane_length + 1], blockhash_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Fill memory
|
||||||
|
fill_memory(memory, t_cost, lanes, lane_length, segment_length);
|
||||||
|
|
||||||
|
// 5. Final block mixing
|
||||||
|
block final_block;
|
||||||
|
copy_block(&final_block, &memory[0 * lane_length + (lane_length - 1)]);
|
||||||
|
|
||||||
|
for (uint32_t l = 1; l < lanes; ++l) {
|
||||||
|
uint32_t last_block_in_lane = l * lane_length + (lane_length - 1);
|
||||||
|
xor_block(&final_block, &memory[last_block_in_lane]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Final hash
|
||||||
|
uint8_t final_block_bytes[ARGON2_BLOCK_SIZE];
|
||||||
|
store_block(final_block_bytes, &final_block);
|
||||||
|
|
||||||
|
blake2b_long(output, 32, final_block_bytes, ARGON2_BLOCK_SIZE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//=== __global__ カーネル例(salt 指定版)===//
|
||||||
|
// ホスト側でブロック用メモリをあらかじめ確保し、そのポインタ(memory_ptr)を渡すことを前提としています。
|
||||||
|
__global__ void argon2d_hash_device_kernel(
|
||||||
|
uint8_t* output,
|
||||||
|
const uint8_t* input, size_t input_len,
|
||||||
|
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
|
||||||
|
block* memory_ptr, // ホスト側で確保したメモリ領域へのポインタ
|
||||||
|
const uint8_t* salt, size_t salt_len
|
||||||
|
) {
|
||||||
|
if (threadIdx.x == 0 && blockIdx.x == 0) {
|
||||||
|
device_argon2d_hash(output, input, input_len, t_cost, m_cost, lanes, memory_ptr, salt, salt_len);
|
||||||
|
}
|
||||||
|
}
|
274
rin/miner/gpu/RinHash-cuda/blake3_device.cuh
Normal file
274
rin/miner/gpu/RinHash-cuda/blake3_device.cuh
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
#include "blaze3_cpu.cuh"
|
||||||
|
|
||||||
|
// Number of threads per thread block
|
||||||
|
__constant__ const int NUM_THREADS = 16;
|
||||||
|
|
||||||
|
// redefine functions, but for the GPU
|
||||||
|
// all of them are the same but with g_ prefixed
|
||||||
|
__constant__ const u32 g_IV[8] = {
|
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||||
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||||
|
};
|
||||||
|
|
||||||
|
__constant__ const int g_MSG_PERMUTATION[] = {
|
||||||
|
2, 6, 3, 10, 7, 0, 4, 13,
|
||||||
|
1, 11, 12, 5, 9, 14, 15, 8
|
||||||
|
};
|
||||||
|
|
||||||
|
__device__ u32 g_rotr(u32 value, int shift) {
|
||||||
|
return (value >> shift)|(value << (usize - shift));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
|
||||||
|
state[a] = state[a] + state[b] + mx;
|
||||||
|
state[d] = g_rotr((state[d] ^ state[a]), 16);
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
|
||||||
|
state[b] = g_rotr((state[b] ^ state[c]), 12);
|
||||||
|
state[a] = state[a] + state[b] + my;
|
||||||
|
state[d] = g_rotr((state[d] ^ state[a]), 8);
|
||||||
|
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
state[b] = g_rotr((state[b] ^ state[c]), 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_round(u32 state[16], u32 m[16]) {
|
||||||
|
// Mix the columns.
|
||||||
|
g_g(state, 0, 4, 8, 12, m[0], m[1]);
|
||||||
|
g_g(state, 1, 5, 9, 13, m[2], m[3]);
|
||||||
|
g_g(state, 2, 6, 10, 14, m[4], m[5]);
|
||||||
|
g_g(state, 3, 7, 11, 15, m[6], m[7]);
|
||||||
|
// Mix the diagonals.
|
||||||
|
g_g(state, 0, 5, 10, 15, m[8], m[9]);
|
||||||
|
g_g(state, 1, 6, 11, 12, m[10], m[11]);
|
||||||
|
g_g(state, 2, 7, 8, 13, m[12], m[13]);
|
||||||
|
g_g(state, 3, 4, 9, 14, m[14], m[15]);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_permute(u32 m[16]) {
|
||||||
|
u32 permuted[16];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
permuted[i] = m[g_MSG_PERMUTATION[i]];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
m[i] = permuted[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// custom memcpy, apparently cuda's memcpy is slow
|
||||||
|
// when called within a kernel
|
||||||
|
__device__ void g_memcpy(u32 *lhs, const u32 *rhs, int size) {
|
||||||
|
// assuming u32 is 4 bytes
|
||||||
|
int len = size / 4;
|
||||||
|
for(int i=0; i<len; i++)
|
||||||
|
lhs[i] = rhs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// custom memset
|
||||||
|
template<typename T, typename ptr_t>
|
||||||
|
__device__ void g_memset(ptr_t dest, T val, int count) {
|
||||||
|
for(int i=0; i<count; i++)
|
||||||
|
dest[i] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_compress(
|
||||||
|
u32 *chaining_value,
|
||||||
|
u32 *block_words,
|
||||||
|
u64 counter,
|
||||||
|
u32 block_len,
|
||||||
|
u32 flags,
|
||||||
|
u32 *state
|
||||||
|
) {
|
||||||
|
// Search for better alternative
|
||||||
|
g_memcpy(state, chaining_value, 32);
|
||||||
|
g_memcpy(state+8, g_IV, 16);
|
||||||
|
state[12] = (u32)counter;
|
||||||
|
state[13] = (u32)(counter >> 32);
|
||||||
|
state[14] = block_len;
|
||||||
|
state[15] = flags;
|
||||||
|
|
||||||
|
u32 block[16];
|
||||||
|
g_memcpy(block, block_words, 64);
|
||||||
|
|
||||||
|
g_round(state, block); // round 1
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 2
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 3
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 4
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 5
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 6
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 7
|
||||||
|
|
||||||
|
for(int i=0; i<8; i++){
|
||||||
|
state[i] ^= state[i + 8];
|
||||||
|
state[i + 8] ^= chaining_value[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_words_from_little_endian_bytes(
|
||||||
|
u8 *bytes, u32 *words, u32 bytes_len
|
||||||
|
) {
|
||||||
|
u32 tmp;
|
||||||
|
for(u32 i=0; i<bytes_len; i+=4) {
|
||||||
|
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
|
||||||
|
words[i/4] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void Chunk::g_compress_chunk(u32 out_flags) {
|
||||||
|
if(flags&PARENT) {
|
||||||
|
g_compress(
|
||||||
|
key,
|
||||||
|
data,
|
||||||
|
0, // counter is always zero for parent nodes
|
||||||
|
BLOCK_LEN,
|
||||||
|
flags | out_flags,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 chaining_value[8];
|
||||||
|
u32 block_len = BLOCK_LEN, flagger;
|
||||||
|
g_memcpy(chaining_value, key, 32);
|
||||||
|
|
||||||
|
bool empty_input = (leaf_len==0);
|
||||||
|
if(empty_input) {
|
||||||
|
for(u32 i=0; i<BLOCK_LEN; i++)
|
||||||
|
leaf_data[i] = 0U;
|
||||||
|
leaf_len = BLOCK_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
// move all mem allocs outside loop
|
||||||
|
u32 block_words[16];
|
||||||
|
u8 block_cast[BLOCK_LEN];
|
||||||
|
|
||||||
|
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
|
||||||
|
flagger = flags;
|
||||||
|
// for the last message block
|
||||||
|
if(i+BLOCK_LEN > leaf_len)
|
||||||
|
block_len = leaf_len%BLOCK_LEN;
|
||||||
|
else
|
||||||
|
block_len = BLOCK_LEN;
|
||||||
|
|
||||||
|
// special case
|
||||||
|
if(empty_input)
|
||||||
|
block_len = 0;
|
||||||
|
|
||||||
|
// clear up block_words
|
||||||
|
g_memset(block_words, 0, 16);
|
||||||
|
|
||||||
|
u32 new_block_len(block_len);
|
||||||
|
if(block_len%4)
|
||||||
|
new_block_len += 4 - (block_len%4);
|
||||||
|
|
||||||
|
// This memcpy is fine since data is a byte array
|
||||||
|
memcpy(block_cast, leaf_data+i, new_block_len*sizeof(*block_cast));
|
||||||
|
|
||||||
|
g_words_from_little_endian_bytes(leaf_data+i, block_words, new_block_len);
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
flagger |= CHUNK_START;
|
||||||
|
if(i+BLOCK_LEN >= leaf_len)
|
||||||
|
flagger |= CHUNK_END | out_flags;
|
||||||
|
|
||||||
|
// raw hash for root node
|
||||||
|
g_compress(
|
||||||
|
chaining_value,
|
||||||
|
block_words,
|
||||||
|
counter,
|
||||||
|
block_len,
|
||||||
|
flagger,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
g_memcpy(chaining_value, raw_hash, 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void compute(Chunk *data, int l, int r) {
|
||||||
|
// n is always a power of 2
|
||||||
|
int n = r-l;
|
||||||
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
if(tid >= n)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(n==1) {
|
||||||
|
data[l].g_compress_chunk();
|
||||||
|
// printf("Compressing : %d\n", l);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Launch child kernels without synchronization (host will handle sync)
|
||||||
|
compute<<<n/2,16>>>(data, l, l+n/2);
|
||||||
|
compute<<<n/2,16>>>(data, l+n/2, r);
|
||||||
|
|
||||||
|
// Wait for all threads in this block to finish
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
data[l].flags |= PARENT;
|
||||||
|
|
||||||
|
memcpy(data[l].data, data[l].raw_hash, 32);
|
||||||
|
memcpy(data[l].data+8, data[l+n/2].raw_hash, 32);
|
||||||
|
data[l].g_compress_chunk();
|
||||||
|
// printf("Compressing : %d to %d\n", l, r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPU version of light_hash (unchanged)
|
||||||
|
void light_hash(Chunk *data, int N, Chunk *result, Chunk *memory_bar) {
|
||||||
|
const int data_size = N*sizeof(Chunk);
|
||||||
|
|
||||||
|
// Device settings
|
||||||
|
// Allows DeviceSync to be called upto 16 levels of recursion
|
||||||
|
cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 16);
|
||||||
|
|
||||||
|
// Device vector
|
||||||
|
Chunk *g_data = memory_bar;
|
||||||
|
cudaMemcpy(g_data, data, data_size, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Actual computation of hash
|
||||||
|
compute<<<N,32>>>(g_data, 0, N);
|
||||||
|
|
||||||
|
cudaMemcpy(result, g_data, sizeof(Chunk), cudaMemcpyDeviceToHost);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Device-callable version of light_hash
|
||||||
|
__device__ void light_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||||
|
// Create a single chunk for processing the input
|
||||||
|
Chunk chunk;
|
||||||
|
|
||||||
|
// Initialize the chunk with the input data
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
chunk.key[i] = g_IV[i]; // Use device constant IV
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the input data to leaf_data (with bounds checking)
|
||||||
|
size_t copy_len = min(input_len, (size_t)BLOCK_LEN * 16); // Ensure we don't overflow
|
||||||
|
for (size_t i = 0; i < copy_len; i++) {
|
||||||
|
chunk.leaf_data[i] = input[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk.leaf_len = copy_len;
|
||||||
|
chunk.counter = 0;
|
||||||
|
chunk.flags = 0; // Default flags
|
||||||
|
|
||||||
|
// Process the chunk directly
|
||||||
|
chunk.g_compress_chunk(ROOT); // Set ROOT flag for final output
|
||||||
|
|
||||||
|
// Copy the raw hash to the output
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
// Convert 32-bit words to bytes in little-endian format
|
||||||
|
output[i*4] = (uint8_t)(chunk.raw_hash[i]);
|
||||||
|
output[i*4+1] = (uint8_t)(chunk.raw_hash[i] >> 8);
|
||||||
|
output[i*4+2] = (uint8_t)(chunk.raw_hash[i] >> 16);
|
||||||
|
output[i*4+3] = (uint8_t)(chunk.raw_hash[i] >> 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alias for compatibility with other device code
|
||||||
|
__device__ void blake3_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||||
|
light_hash_device(input, input_len, output);
|
||||||
|
}
|
419
rin/miner/gpu/RinHash-cuda/blaze3_cpu.cuh
Normal file
419
rin/miner/gpu/RinHash-cuda/blaze3_cpu.cuh
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
// Let's use a pinned memory vector!
|
||||||
|
// Removed Thrust pinned allocator dependency for portability
|
||||||
|
// #include <thrust/host_vector.h>
|
||||||
|
// #include <thrust/system/cuda/experimental/pinned_allocator.h>
|
||||||
|
|
||||||
|
using u32 = uint32_t;
|
||||||
|
using u64 = uint64_t;
|
||||||
|
using u8 = uint8_t;
|
||||||
|
|
||||||
|
const u32 OUT_LEN = 32;
|
||||||
|
const u32 KEY_LEN = 32;
|
||||||
|
const u32 BLOCK_LEN = 64;
|
||||||
|
const u32 CHUNK_LEN = 1024;
|
||||||
|
// Multiple chunks make a snicker bar :)
|
||||||
|
const u32 SNICKER = 1U << 10;
|
||||||
|
// Factory height and snicker size have an inversly propotional relationship
|
||||||
|
// FACTORY_HT * (log2 SNICKER) + 10 >= 64
|
||||||
|
const u32 FACTORY_HT = 5;
|
||||||
|
|
||||||
|
const u32 CHUNK_START = 1 << 0;
|
||||||
|
const u32 CHUNK_END = 1 << 1;
|
||||||
|
const u32 PARENT = 1 << 2;
|
||||||
|
const u32 ROOT = 1 << 3;
|
||||||
|
const u32 KEYED_HASH = 1 << 4;
|
||||||
|
const u32 DERIVE_KEY_CONTEXT = 1 << 5;
|
||||||
|
const u32 DERIVE_KEY_MATERIAL = 1 << 6;
|
||||||
|
|
||||||
|
const int usize = sizeof(u32) * 8;
|
||||||
|
|
||||||
|
u32 IV[8] = {
|
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||||
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||||
|
};
|
||||||
|
|
||||||
|
const int MSG_PERMUTATION[] = {
|
||||||
|
2, 6, 3, 10, 7, 0, 4, 13,
|
||||||
|
1, 11, 12, 5, 9, 14, 15, 8
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 rotr(u32 value, int shift) {
|
||||||
|
return (value >> shift)|(value << (usize - shift));
|
||||||
|
}
|
||||||
|
|
||||||
|
void g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
|
||||||
|
state[a] = state[a] + state[b] + mx;
|
||||||
|
state[d] = rotr((state[d] ^ state[a]), 16);
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
|
||||||
|
state[b] = rotr((state[b] ^ state[c]), 12);
|
||||||
|
state[a] = state[a] + state[b] + my;
|
||||||
|
state[d] = rotr((state[d] ^ state[a]), 8);
|
||||||
|
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
state[b] = rotr((state[b] ^ state[c]), 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
void round(u32 state[16], u32 m[16]) {
|
||||||
|
// Mix the columns.
|
||||||
|
g(state, 0, 4, 8, 12, m[0], m[1]);
|
||||||
|
g(state, 1, 5, 9, 13, m[2], m[3]);
|
||||||
|
g(state, 2, 6, 10, 14, m[4], m[5]);
|
||||||
|
g(state, 3, 7, 11, 15, m[6], m[7]);
|
||||||
|
// Mix the diagonals.
|
||||||
|
g(state, 0, 5, 10, 15, m[8], m[9]);
|
||||||
|
g(state, 1, 6, 11, 12, m[10], m[11]);
|
||||||
|
g(state, 2, 7, 8, 13, m[12], m[13]);
|
||||||
|
g(state, 3, 4, 9, 14, m[14], m[15]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void permute(u32 m[16]) {
|
||||||
|
u32 permuted[16];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
permuted[i] = m[MSG_PERMUTATION[i]];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
m[i] = permuted[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
void compress(
|
||||||
|
u32 *chaining_value,
|
||||||
|
u32 *block_words,
|
||||||
|
u64 counter,
|
||||||
|
u32 block_len,
|
||||||
|
u32 flags,
|
||||||
|
u32 *state
|
||||||
|
) {
|
||||||
|
memcpy(state, chaining_value, 8*sizeof(*state));
|
||||||
|
memcpy(state+8, IV, 4*sizeof(*state));
|
||||||
|
state[12] = (u32)counter;
|
||||||
|
state[13] = (u32)(counter >> 32);
|
||||||
|
state[14] = block_len;
|
||||||
|
state[15] = flags;
|
||||||
|
|
||||||
|
u32 block[16];
|
||||||
|
memcpy(block, block_words, 16*sizeof(*block));
|
||||||
|
|
||||||
|
round(state, block); // round 1
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 2
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 3
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 4
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 5
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 6
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 7
|
||||||
|
|
||||||
|
for(int i=0; i<8; i++){
|
||||||
|
state[i] ^= state[i + 8];
|
||||||
|
state[i + 8] ^= chaining_value[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void words_from_little_endian_bytes(u8 *bytes, u32 *words, u32 bytes_len) {
|
||||||
|
u32 tmp;
|
||||||
|
for(u32 i=0; i<bytes_len; i+=4) {
|
||||||
|
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
|
||||||
|
words[i/4] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Chunk {
|
||||||
|
// use only when it is a leaf node
|
||||||
|
// leaf data may have less than 1024 bytes
|
||||||
|
u8 leaf_data[1024];
|
||||||
|
u32 leaf_len;
|
||||||
|
// use in all other cases
|
||||||
|
// data will always have 64 bytes
|
||||||
|
u32 data[16];
|
||||||
|
u32 flags;
|
||||||
|
u32 raw_hash[16];
|
||||||
|
u32 key[8];
|
||||||
|
// only useful for leaf nodes
|
||||||
|
u64 counter;
|
||||||
|
// Constructor for leaf nodes
|
||||||
|
__device__ __host__ Chunk(char *input, int size, u32 _flags, u32 *_key, u64 ctr){
|
||||||
|
counter = ctr;
|
||||||
|
flags = _flags;
|
||||||
|
memcpy(key, _key, 8*sizeof(*key));
|
||||||
|
memset(leaf_data, 0, 1024);
|
||||||
|
memcpy(leaf_data, input, size);
|
||||||
|
leaf_len = size;
|
||||||
|
}
|
||||||
|
__device__ __host__ Chunk(u32 _flags, u32 *_key) {
|
||||||
|
counter = 0;
|
||||||
|
flags = _flags;
|
||||||
|
memcpy(key, _key, 8*sizeof(*key));
|
||||||
|
leaf_len = 0;
|
||||||
|
}
|
||||||
|
__device__ __host__ Chunk() {}
|
||||||
|
// Chunk() : leaf_len(0) {}
|
||||||
|
// process data in sizes of message blocks and store cv in hash
|
||||||
|
void compress_chunk(u32=0);
|
||||||
|
__device__ void g_compress_chunk(u32=0);
|
||||||
|
};
|
||||||
|
|
||||||
|
void Chunk::compress_chunk(u32 out_flags) {
|
||||||
|
if(flags&PARENT) {
|
||||||
|
compress(
|
||||||
|
key,
|
||||||
|
data,
|
||||||
|
0, // counter is always zero for parent nodes
|
||||||
|
BLOCK_LEN,
|
||||||
|
flags | out_flags,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 chaining_value[8], block_len = BLOCK_LEN, flagger;
|
||||||
|
memcpy(chaining_value, key, 8*sizeof(*chaining_value));
|
||||||
|
|
||||||
|
bool empty_input = (leaf_len==0);
|
||||||
|
if(empty_input) {
|
||||||
|
for(u32 i=0; i<BLOCK_LEN; i++)
|
||||||
|
leaf_data[i] = 0U;
|
||||||
|
leaf_len = BLOCK_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
|
||||||
|
flagger = flags;
|
||||||
|
// for the last message block
|
||||||
|
if(i+BLOCK_LEN > leaf_len)
|
||||||
|
block_len = leaf_len%BLOCK_LEN;
|
||||||
|
else
|
||||||
|
block_len = BLOCK_LEN;
|
||||||
|
|
||||||
|
// special case
|
||||||
|
if(empty_input)
|
||||||
|
block_len = 0;
|
||||||
|
|
||||||
|
u32 block_words[16];
|
||||||
|
memset(block_words, 0, 16*sizeof(*block_words));
|
||||||
|
u32 new_block_len(block_len);
|
||||||
|
if(block_len%4)
|
||||||
|
new_block_len += 4 - (block_len%4);
|
||||||
|
|
||||||
|
// BLOCK_LEN is the max possible length of block_cast
|
||||||
|
u8 block_cast[BLOCK_LEN];
|
||||||
|
memset(block_cast, 0, new_block_len*sizeof(*block_cast));
|
||||||
|
memcpy(block_cast, leaf_data+i, block_len*sizeof(*block_cast));
|
||||||
|
|
||||||
|
words_from_little_endian_bytes(block_cast, block_words, new_block_len);
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
flagger |= CHUNK_START;
|
||||||
|
if(i+BLOCK_LEN >= leaf_len)
|
||||||
|
flagger |= CHUNK_END | out_flags;
|
||||||
|
|
||||||
|
// raw hash for root node
|
||||||
|
compress(
|
||||||
|
chaining_value,
|
||||||
|
block_words,
|
||||||
|
counter,
|
||||||
|
block_len,
|
||||||
|
flagger,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
memcpy(chaining_value, raw_hash, 8*sizeof(*chaining_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback alias: use std::vector instead of thrust pinned host vector
|
||||||
|
using thrust_vector = std::vector<Chunk>;
|
||||||
|
|
||||||
|
// The GPU hasher
|
||||||
|
void light_hash(Chunk*, int, Chunk*, Chunk*);
|
||||||
|
|
||||||
|
// Sanity checks
|
||||||
|
Chunk hash_many(Chunk *data, int first, int last, Chunk *memory_bar) {
|
||||||
|
// n will always be a power of 2
|
||||||
|
int n = last-first;
|
||||||
|
// Reduce GPU calling overhead
|
||||||
|
if(n == 1) {
|
||||||
|
data[first].compress_chunk();
|
||||||
|
return data[first];
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk ret;
|
||||||
|
light_hash(data+first, n, &ret, memory_bar);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
// CPU style execution
|
||||||
|
// Chunk left, right;
|
||||||
|
// left = hash_many(data, first, first+n/2);
|
||||||
|
// right = hash_many(data, first+n/2, last);
|
||||||
|
// Chunk parent(left.flags, left.key);
|
||||||
|
// parent.flags |= PARENT;
|
||||||
|
// memcpy(parent.data, left.raw_hash, 32);
|
||||||
|
// memcpy(parent.data+8, right.raw_hash, 32);
|
||||||
|
// parent.compress_chunk();
|
||||||
|
// return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk merge(Chunk &left, Chunk &right);
|
||||||
|
void hash_root(Chunk &node, vector<u8> &out_slice);
|
||||||
|
|
||||||
|
struct Hasher {
|
||||||
|
u32 key[8];
|
||||||
|
u32 flags;
|
||||||
|
u64 ctr;
|
||||||
|
u64 file_size;
|
||||||
|
// A memory bar for CUDA to use during it's computation
|
||||||
|
Chunk* memory_bar;
|
||||||
|
// Factory is an array of FACTORY_HT possible SNICKER bars
|
||||||
|
thrust_vector factory[FACTORY_HT];
|
||||||
|
|
||||||
|
// methods
|
||||||
|
static Hasher new_internal(u32 key[8], u32 flags, u64 fsize);
|
||||||
|
static Hasher _new(u64);
|
||||||
|
// initializes cuda memory (if needed)
|
||||||
|
void init();
|
||||||
|
// frees cuda memory (if it is there)
|
||||||
|
// free nullptr is a no-op
|
||||||
|
~Hasher() {
|
||||||
|
if(memory_bar)
|
||||||
|
cudaFree(memory_bar);
|
||||||
|
else
|
||||||
|
free(memory_bar);
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(char *input, int size);
|
||||||
|
void finalize(vector<u8> &out_slice);
|
||||||
|
void propagate();
|
||||||
|
};
|
||||||
|
|
||||||
|
Hasher Hasher::new_internal(u32 key[8], u32 flags, u64 fsize) {
|
||||||
|
return Hasher{
|
||||||
|
{
|
||||||
|
key[0], key[1], key[2], key[3],
|
||||||
|
key[4], key[5], key[6], key[7]
|
||||||
|
},
|
||||||
|
flags,
|
||||||
|
0, // counter
|
||||||
|
fsize
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher Hasher::_new(u64 fsize) { return new_internal(IV, 0, fsize); }
|
||||||
|
|
||||||
|
void Hasher::init() {
|
||||||
|
if(file_size<1) {
|
||||||
|
memory_bar = nullptr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u64 num_chunks = ceil(file_size / CHUNK_LEN);
|
||||||
|
u32 bar_size = min(num_chunks, (u64)SNICKER);
|
||||||
|
// Just for safety :)
|
||||||
|
++bar_size;
|
||||||
|
cudaMalloc(&memory_bar, bar_size*sizeof(Chunk));
|
||||||
|
|
||||||
|
// Let the most commonly used places always have memory
|
||||||
|
// +1 so that it does not resize when it hits CHUNK_LEN
|
||||||
|
u32 RESERVE = SNICKER + 1;
|
||||||
|
factory[0].reserve(RESERVE);
|
||||||
|
factory[1].reserve(RESERVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hasher::propagate() {
|
||||||
|
int level=0;
|
||||||
|
// nodes move to upper levels if lower one is one SNICKER long
|
||||||
|
while(factory[level].size() == SNICKER) {
|
||||||
|
Chunk subtree = hash_many(factory[level].data(), 0, SNICKER, memory_bar);
|
||||||
|
factory[level].clear();
|
||||||
|
++level;
|
||||||
|
factory[level].push_back(subtree);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hasher::update(char *input, int size) {
|
||||||
|
factory[0].push_back(Chunk(input, size, flags, key, ctr));
|
||||||
|
++ctr;
|
||||||
|
if(factory[0].size() == SNICKER)
|
||||||
|
propagate();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hasher::finalize(vector<u8> &out_slice) {
|
||||||
|
Chunk root(flags, key);
|
||||||
|
for(int i=0; i<FACTORY_HT; i++) {
|
||||||
|
vector<Chunk> subtrees;
|
||||||
|
u32 n = factory[i].size(), divider=SNICKER;
|
||||||
|
if(!n)
|
||||||
|
continue;
|
||||||
|
int start = 0;
|
||||||
|
while(divider) {
|
||||||
|
if(n÷r) {
|
||||||
|
Chunk subtree = hash_many(factory[i].data(), start, start+divider, memory_bar);
|
||||||
|
subtrees.push_back(subtree);
|
||||||
|
start += divider;
|
||||||
|
}
|
||||||
|
divider >>= 1;
|
||||||
|
}
|
||||||
|
while(subtrees.size()>1) {
|
||||||
|
Chunk tmp1 = subtrees.back();
|
||||||
|
subtrees.pop_back();
|
||||||
|
Chunk tmp2 = subtrees.back();
|
||||||
|
subtrees.pop_back();
|
||||||
|
// tmp2 is the left child
|
||||||
|
// tmp1 is the right child
|
||||||
|
// that's the order they appear within the array
|
||||||
|
Chunk tmp = merge(tmp2, tmp1);
|
||||||
|
subtrees.push_back(tmp);
|
||||||
|
}
|
||||||
|
if(i<FACTORY_HT-1)
|
||||||
|
factory[i+1].push_back(subtrees[0]);
|
||||||
|
else
|
||||||
|
root = subtrees[0];
|
||||||
|
}
|
||||||
|
hash_root(root, out_slice);
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk merge(Chunk &left, Chunk &right) {
|
||||||
|
// cout << "Called merge once\n";
|
||||||
|
left.compress_chunk();
|
||||||
|
right.compress_chunk();
|
||||||
|
|
||||||
|
Chunk parent(left.flags, left.key);
|
||||||
|
parent.flags |= PARENT;
|
||||||
|
// 32 bytes need to be copied for all of these
|
||||||
|
memcpy(parent.data, left.raw_hash, 32);
|
||||||
|
memcpy(parent.data+8, right.raw_hash, 32);
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hash_root(Chunk &node, vector<u8> &out_slice) {
|
||||||
|
// the last message block must not be hashed like the others
|
||||||
|
// it needs to be hashed with the root flag
|
||||||
|
u64 output_block_counter = 0;
|
||||||
|
u64 i=0, k=2*OUT_LEN;
|
||||||
|
|
||||||
|
u32 words[16] = {};
|
||||||
|
for(; int(out_slice.size()-i)>0; i+=k) {
|
||||||
|
node.counter = output_block_counter;
|
||||||
|
node.compress_chunk(ROOT);
|
||||||
|
|
||||||
|
// words is u32[16]
|
||||||
|
memcpy(words, node.raw_hash, 16*sizeof(*words));
|
||||||
|
|
||||||
|
vector<u8> out_block(min(k, (u64)out_slice.size()-i));
|
||||||
|
for(u32 l=0; l<out_block.size(); l+=4) {
|
||||||
|
for(u32 j=0; j<min(4U, (u32)out_block.size()-l); j++)
|
||||||
|
out_block[l+j] = (words[l/4]>>(8*j)) & 0x000000FF;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(u32 j=0; j<out_block.size(); j++)
|
||||||
|
out_slice[i+j] = out_block[j];
|
||||||
|
|
||||||
|
++output_block_counter;
|
||||||
|
}
|
||||||
|
}
|
99
rin/miner/gpu/RinHash-cuda/build-cuda-linux.sh
Normal file
99
rin/miner/gpu/RinHash-cuda/build-cuda-linux.sh
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# RinHash CUDA Build Script for Linux/WSL
|
||||||
|
# This script builds the CUDA implementation of RinHash
|
||||||
|
|
||||||
|
echo "======================================"
|
||||||
|
echo " RinHash CUDA Miner Build Script"
|
||||||
|
echo "======================================"
|
||||||
|
|
||||||
|
# Check if NVCC is available
|
||||||
|
if ! command -v nvcc &> /dev/null; then
|
||||||
|
echo "ERROR: NVCC not found in PATH"
|
||||||
|
echo "Please install CUDA Toolkit"
|
||||||
|
echo "On Ubuntu/Debian: sudo apt install nvidia-cuda-toolkit"
|
||||||
|
echo "Or download from: https://developer.nvidia.com/cuda-downloads"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "NVCC found:"
|
||||||
|
nvcc --version
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if gcc/g++ is available
|
||||||
|
if ! command -v gcc &> /dev/null; then
|
||||||
|
echo "ERROR: GCC not found in PATH"
|
||||||
|
echo "Please install build-essential: sudo apt install build-essential"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "GCC found:"
|
||||||
|
gcc --version | head -1
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "Building RinHash CUDA miner..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
mkdir -p bin
|
||||||
|
|
||||||
|
# Compile with NVCC (enable device linking for dynamic parallelism)
|
||||||
|
nvcc -O3 -std=c++11 \
|
||||||
|
-arch=sm_50 \
|
||||||
|
-gencode arch=compute_50,code=sm_50 \
|
||||||
|
-gencode arch=compute_52,code=sm_52 \
|
||||||
|
-gencode arch=compute_60,code=sm_60 \
|
||||||
|
-gencode arch=compute_61,code=sm_61 \
|
||||||
|
-gencode arch=compute_70,code=sm_70 \
|
||||||
|
-gencode arch=compute_75,code=sm_75 \
|
||||||
|
-gencode arch=compute_80,code=sm_80 \
|
||||||
|
-gencode arch=compute_86,code=sm_86 \
|
||||||
|
-I. \
|
||||||
|
rinhash.cu sha3-256.cu \
|
||||||
|
-o bin/rinhash-cuda-miner \
|
||||||
|
-lcuda -lcudart -lcudadevrt
|
||||||
|
|
||||||
|
# Also build test program
|
||||||
|
echo "Building test program..."
|
||||||
|
nvcc -O3 -std=c++11 \
|
||||||
|
-arch=sm_50 \
|
||||||
|
-gencode arch=compute_50,code=sm_50 \
|
||||||
|
-gencode arch=compute_52,code=sm_52 \
|
||||||
|
-gencode arch=compute_60,code=sm_60 \
|
||||||
|
-gencode arch=compute_61,code=sm_61 \
|
||||||
|
-gencode arch=compute_70,code=sm_70 \
|
||||||
|
-gencode arch=compute_75,code=sm_75 \
|
||||||
|
-gencode arch=compute_80,code=sm_80 \
|
||||||
|
-gencode arch=compute_86,code=sm_86 \
|
||||||
|
-I. \
|
||||||
|
test_miner.cu rinhash.cu sha3-256.cu \
|
||||||
|
-o bin/test_miner \
|
||||||
|
-lcuda -lcudart -lcudadevrt
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "======================================"
|
||||||
|
echo " BUILD SUCCESSFUL!"
|
||||||
|
echo "======================================"
|
||||||
|
echo ""
|
||||||
|
echo "Executables created:"
|
||||||
|
echo " - bin/rinhash-cuda-miner (main miner)"
|
||||||
|
echo " - bin/test_miner (test program)"
|
||||||
|
echo ""
|
||||||
|
echo "To test the miner:"
|
||||||
|
echo " ./bin/test_miner"
|
||||||
|
echo ""
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "======================================"
|
||||||
|
echo " BUILD FAILED!"
|
||||||
|
echo "======================================"
|
||||||
|
echo ""
|
||||||
|
echo "Common issues:"
|
||||||
|
echo "1. Missing CUDA runtime libraries"
|
||||||
|
echo "2. Incompatible CUDA version"
|
||||||
|
echo "3. Missing development tools"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Build completed successfully!"
|
97
rin/miner/gpu/RinHash-cuda/build-cuda.bat
Normal file
97
rin/miner/gpu/RinHash-cuda/build-cuda.bat
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
@echo off
|
||||||
|
REM RinHash CUDA Build Script
|
||||||
|
REM This script attempts to build the CUDA implementation of RinHash
|
||||||
|
|
||||||
|
echo ======================================
|
||||||
|
echo RinHash CUDA Miner Build Script
|
||||||
|
echo ======================================
|
||||||
|
|
||||||
|
REM Check if NVCC is available
|
||||||
|
where nvcc >nul 2>nul
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo ERROR: NVCC not found in PATH
|
||||||
|
echo Please install CUDA Toolkit
|
||||||
|
goto :error
|
||||||
|
)
|
||||||
|
|
||||||
|
echo NVCC found:
|
||||||
|
nvcc --version
|
||||||
|
echo.
|
||||||
|
|
||||||
|
REM Try to find Visual Studio
|
||||||
|
set "VS2019_PATH=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
|
||||||
|
set "VS2022_PATH=C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"
|
||||||
|
|
||||||
|
if exist "%VS2022_PATH%" (
|
||||||
|
echo Using Visual Studio 2022...
|
||||||
|
call "%VS2022_PATH%"
|
||||||
|
goto :compile
|
||||||
|
)
|
||||||
|
|
||||||
|
if exist "%VS2019_PATH%" (
|
||||||
|
echo Using Visual Studio 2019 Build Tools...
|
||||||
|
call "%VS2019_PATH%"
|
||||||
|
goto :compile
|
||||||
|
)
|
||||||
|
|
||||||
|
echo ERROR: No Visual Studio installation found
|
||||||
|
echo.
|
||||||
|
echo SOLUTION 1: Install Visual Studio Community 2022 (free)
|
||||||
|
echo - Download from: https://visualstudio.microsoft.com/downloads/
|
||||||
|
echo - Make sure to include "Desktop development with C++" workload
|
||||||
|
echo - Include Windows 10/11 SDK
|
||||||
|
echo.
|
||||||
|
echo SOLUTION 2: Install Visual Studio Build Tools 2022
|
||||||
|
echo - Download from: https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2022
|
||||||
|
echo - Include C++ build tools and Windows SDK
|
||||||
|
echo.
|
||||||
|
goto :error
|
||||||
|
|
||||||
|
:compile
|
||||||
|
echo.
|
||||||
|
echo Building RinHash CUDA miner...
|
||||||
|
echo.
|
||||||
|
|
||||||
|
REM Compile with NVCC (enable device linking for dynamic parallelism)
|
||||||
|
nvcc -O3 -rdc=true -arch=sm_50 ^
|
||||||
|
-gencode arch=compute_50,code=sm_50 ^
|
||||||
|
-I. rinhash.cu sha3-256.cu ^
|
||||||
|
-o rinhash-cuda-miner.exe ^
|
||||||
|
-lcuda -lcudart -lcudadevrt
|
||||||
|
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo.
|
||||||
|
echo BUILD FAILED!
|
||||||
|
echo.
|
||||||
|
echo Common issues:
|
||||||
|
echo 1. Missing Windows SDK - install via Visual Studio Installer
|
||||||
|
echo 2. Incompatible Visual Studio version
|
||||||
|
echo 3. Missing CUDA runtime libraries
|
||||||
|
echo.
|
||||||
|
goto :error
|
||||||
|
)
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ======================================
|
||||||
|
echo BUILD SUCCESSFUL!
|
||||||
|
echo ======================================
|
||||||
|
echo.
|
||||||
|
echo Executable created: rinhash-cuda-miner.exe
|
||||||
|
echo.
|
||||||
|
echo To test the miner:
|
||||||
|
echo rinhash-cuda-miner.exe --help
|
||||||
|
echo.
|
||||||
|
goto :end
|
||||||
|
|
||||||
|
:error
|
||||||
|
echo.
|
||||||
|
echo ======================================
|
||||||
|
echo BUILD FAILED!
|
||||||
|
echo ======================================
|
||||||
|
echo.
|
||||||
|
pause
|
||||||
|
exit /b 1
|
||||||
|
|
||||||
|
:end
|
||||||
|
echo Build completed successfully!
|
||||||
|
pause
|
232
rin/miner/gpu/RinHash-cuda/rinhash.cu
Normal file
232
rin/miner/gpu/RinHash-cuda/rinhash.cu
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <device_launch_parameters.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
// Include shared device functions
|
||||||
|
#include "rinhash_device.cuh"
|
||||||
|
#include "argon2d_device.cuh"
|
||||||
|
#include "sha3-256.cu"
|
||||||
|
#include "blake3_device.cuh"
|
||||||
|
|
||||||
|
// Modified kernel to use device functions
|
||||||
|
extern "C" __global__ void rinhash_cuda_kernel(
|
||||||
|
const uint8_t* input,
|
||||||
|
size_t input_len,
|
||||||
|
uint8_t* output,
|
||||||
|
block* argon2_memory
|
||||||
|
) {
|
||||||
|
__shared__ uint8_t blake3_out[32];
|
||||||
|
__shared__ uint8_t argon2_out[32];
|
||||||
|
if (threadIdx.x == 0) {
|
||||||
|
light_hash_device(input, input_len, blake3_out);
|
||||||
|
uint8_t salt[11] = { 'R','i','n','C','o','i','n','S','a','l','t' };
|
||||||
|
device_argon2d_hash(argon2_out, blake3_out, 32, 2, 64, 1, argon2_memory, salt, 11);
|
||||||
|
uint8_t sha3_out[32];
|
||||||
|
sha3_256_device(argon2_out, 32, sha3_out);
|
||||||
|
for (int i = 0; i < 32; i++) output[i] = sha3_out[i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
// RinHash CUDA implementation
|
||||||
|
extern "C" void rinhash_cuda(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||||
|
const uint32_t m_cost = 64; // Argon2 blocks (64 KiB)
|
||||||
|
|
||||||
|
uint8_t *d_input = nullptr;
|
||||||
|
uint8_t *d_output = nullptr;
|
||||||
|
block *d_memory = nullptr;
|
||||||
|
|
||||||
|
cudaError_t err;
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_input, input_len);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "CUDA error: Failed to allocate input memory: %s\n", cudaGetErrorString(err));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_output, 32);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "CUDA error: Failed to allocate output memory: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_input);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "CUDA error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMemcpy(d_input, input, input_len, cudaMemcpyHostToDevice);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "CUDA error: Failed to copy input to device: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
rinhash_cuda_kernel<<<1, 1>>>(d_input, input_len, d_output, d_memory);
|
||||||
|
|
||||||
|
err = cudaDeviceSynchronize();
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "CUDA error during kernel execution: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "CUDA error: Failed to copy output from device: %s\n", cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to convert a block header to bytes
|
||||||
|
extern "C" void blockheader_to_bytes(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
const uint32_t* nonce,
|
||||||
|
uint8_t* output,
|
||||||
|
size_t* output_len
|
||||||
|
) {
|
||||||
|
size_t offset = 0;
|
||||||
|
memcpy(output + offset, version, 4); offset += 4;
|
||||||
|
memcpy(output + offset, prev_block, 32); offset += 32;
|
||||||
|
memcpy(output + offset, merkle_root, 32); offset += 32;
|
||||||
|
memcpy(output + offset, timestamp, 4); offset += 4;
|
||||||
|
memcpy(output + offset, bits, 4); offset += 4;
|
||||||
|
memcpy(output + offset, nonce, 4); offset += 4;
|
||||||
|
*output_len = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch processing version for mining (sequential per header for now)
|
||||||
|
extern "C" void rinhash_cuda_batch(
|
||||||
|
const uint8_t* block_headers,
|
||||||
|
size_t block_header_len,
|
||||||
|
uint8_t* outputs,
|
||||||
|
uint32_t num_blocks
|
||||||
|
) {
|
||||||
|
const uint32_t m_cost = 64;
|
||||||
|
|
||||||
|
uint8_t *d_input = NULL;
|
||||||
|
uint8_t *d_output = NULL;
|
||||||
|
block *d_memory = NULL;
|
||||||
|
|
||||||
|
cudaError_t err;
|
||||||
|
|
||||||
|
err = cudaMalloc((void**)&d_input, block_header_len);
|
||||||
|
if (err != cudaSuccess) { fprintf(stderr, "CUDA error: alloc header: %s\n", cudaGetErrorString(err)); return; }
|
||||||
|
err = cudaMalloc((void**)&d_output, 32);
|
||||||
|
if (err != cudaSuccess) { fprintf(stderr, "CUDA error: alloc output: %s\n", cudaGetErrorString(err)); cudaFree(d_input); return; }
|
||||||
|
err = cudaMalloc((void**)&d_memory, m_cost * sizeof(block));
|
||||||
|
if (err != cudaSuccess) { fprintf(stderr, "CUDA error: alloc argon2 mem: %s\n", cudaGetErrorString(err)); cudaFree(d_input); cudaFree(d_output); return; }
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_blocks; i++) {
|
||||||
|
const uint8_t* input = block_headers + i * block_header_len;
|
||||||
|
uint8_t* output = outputs + i * 32;
|
||||||
|
|
||||||
|
err = cudaMemcpy(d_input, input, block_header_len, cudaMemcpyHostToDevice);
|
||||||
|
if (err != cudaSuccess) { fprintf(stderr, "CUDA error: copy header %u: %s\n", i, cudaGetErrorString(err)); break; }
|
||||||
|
|
||||||
|
rinhash_cuda_kernel<<<1, 1>>>(d_input, block_header_len, d_output, d_memory);
|
||||||
|
err = cudaDeviceSynchronize();
|
||||||
|
if (err != cudaSuccess) { fprintf(stderr, "CUDA error in kernel %u: %s\n", i, cudaGetErrorString(err)); break; }
|
||||||
|
|
||||||
|
err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
|
||||||
|
if (err != cudaSuccess) { fprintf(stderr, "CUDA error: copy out %u: %s\n", i, cudaGetErrorString(err)); break; }
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_output);
|
||||||
|
cudaFree(d_input);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main RinHash function that would be called from outside
|
||||||
|
extern "C" void RinHash(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
const uint32_t* nonce,
|
||||||
|
uint8_t* output
|
||||||
|
) {
|
||||||
|
uint8_t block_header[80]; // Standard block header size
|
||||||
|
size_t block_header_len;
|
||||||
|
|
||||||
|
blockheader_to_bytes(
|
||||||
|
version,
|
||||||
|
prev_block,
|
||||||
|
merkle_root,
|
||||||
|
timestamp,
|
||||||
|
bits,
|
||||||
|
nonce,
|
||||||
|
block_header,
|
||||||
|
&block_header_len
|
||||||
|
);
|
||||||
|
|
||||||
|
rinhash_cuda(block_header, block_header_len, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mining function that tries different nonces
|
||||||
|
extern "C" void RinHash_mine(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
uint32_t start_nonce,
|
||||||
|
uint32_t num_nonces,
|
||||||
|
uint32_t* found_nonce,
|
||||||
|
uint8_t* target_hash,
|
||||||
|
uint8_t* best_hash
|
||||||
|
) {
|
||||||
|
const size_t block_header_len = 80;
|
||||||
|
std::vector<uint8_t> block_headers(block_header_len * num_nonces);
|
||||||
|
std::vector<uint8_t> hashes(32 * num_nonces);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_nonces; i++) {
|
||||||
|
uint32_t current_nonce = start_nonce + i;
|
||||||
|
uint8_t* header = block_headers.data() + i * block_header_len;
|
||||||
|
size_t header_len;
|
||||||
|
blockheader_to_bytes(
|
||||||
|
version,
|
||||||
|
prev_block,
|
||||||
|
merkle_root,
|
||||||
|
timestamp,
|
||||||
|
bits,
|
||||||
|
¤t_nonce,
|
||||||
|
header,
|
||||||
|
&header_len
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
rinhash_cuda_batch(block_headers.data(), block_header_len, hashes.data(), num_nonces);
|
||||||
|
|
||||||
|
memcpy(best_hash, hashes.data(), 32);
|
||||||
|
*found_nonce = start_nonce;
|
||||||
|
for (uint32_t i = 1; i < num_nonces; i++) {
|
||||||
|
uint8_t* current_hash = hashes.data() + i * 32;
|
||||||
|
bool is_better = false;
|
||||||
|
for (int j = 0; j < 32; j++) {
|
||||||
|
if (current_hash[j] < best_hash[j]) { is_better = true; break; }
|
||||||
|
else if (current_hash[j] > best_hash[j]) { break; }
|
||||||
|
}
|
||||||
|
if (is_better) { memcpy(best_hash, current_hash, 32); *found_nonce = start_nonce + i; }
|
||||||
|
}
|
||||||
|
}
|
8
rin/miner/gpu/RinHash-cuda/rinhash_device.cuh
Normal file
8
rin/miner/gpu/RinHash-cuda/rinhash_device.cuh
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
#ifndef RINHASH_DEVICE_CUH
|
||||||
|
#define RINHASH_DEVICE_CUH
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <device_launch_parameters.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#endif // RINHASH_DEVICE_CUH
|
140
rin/miner/gpu/RinHash-cuda/sha3-256.cu
Normal file
140
rin/miner/gpu/RinHash-cuda/sha3-256.cu
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#define KECCAKF_ROUNDS 24
|
||||||
|
|
||||||
|
|
||||||
|
// 64bit 値のビット回転(左回転)
|
||||||
|
__device__ inline uint64_t rotate(uint64_t x, int n) {
|
||||||
|
return (x << n) | (x >> (64 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keccak‐f[1600] 変換(内部状態 st[25] に対して 24 ラウンドの permutation を実行)
|
||||||
|
__device__ inline uint64_t ROTL64(uint64_t x, int n) {
|
||||||
|
return (x << n) | (x >> (64 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void keccakf(uint64_t st[25]) {
|
||||||
|
const int R[24] = {
|
||||||
|
1, 3, 6, 10, 15, 21,
|
||||||
|
28, 36, 45, 55, 2, 14,
|
||||||
|
27, 41, 56, 8, 25, 43,
|
||||||
|
62, 18, 39, 61, 20, 44
|
||||||
|
};
|
||||||
|
|
||||||
|
const int P[24] = {
|
||||||
|
10, 7, 11, 17, 18, 3,
|
||||||
|
5, 16, 8, 21, 24, 4,
|
||||||
|
15, 23, 19, 13, 12, 2,
|
||||||
|
20, 14, 22, 9, 6, 1
|
||||||
|
};
|
||||||
|
|
||||||
|
const uint64_t RC[24] = {
|
||||||
|
0x0000000000000001ULL, 0x0000000000008082ULL,
|
||||||
|
0x800000000000808aULL, 0x8000000080008000ULL,
|
||||||
|
0x000000000000808bULL, 0x0000000080000001ULL,
|
||||||
|
0x8000000080008081ULL, 0x8000000000008009ULL,
|
||||||
|
0x000000000000008aULL, 0x0000000000000088ULL,
|
||||||
|
0x0000000080008009ULL, 0x000000008000000aULL,
|
||||||
|
0x000000008000808bULL, 0x800000000000008bULL,
|
||||||
|
0x8000000000008089ULL, 0x8000000000008003ULL,
|
||||||
|
0x8000000000008002ULL, 0x8000000000000080ULL,
|
||||||
|
0x000000000000800aULL, 0x800000008000000aULL,
|
||||||
|
0x8000000080008081ULL, 0x8000000000008080ULL,
|
||||||
|
0x0000000080000001ULL, 0x8000000080008008ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
int i, j, round;
|
||||||
|
uint64_t t, bc[5];
|
||||||
|
|
||||||
|
for (round = 0; round < 24; round++) {
|
||||||
|
// Theta
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
|
||||||
|
for (i = 0; i < 5; i++) {
|
||||||
|
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
||||||
|
for (j = 0; j < 25; j += 5)
|
||||||
|
st[j + i] ^= t;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rho and Pi
|
||||||
|
t = st[1];
|
||||||
|
for (i = 0; i < 24; i++) {
|
||||||
|
j = P[i];
|
||||||
|
bc[0] = st[j];
|
||||||
|
st[j] = ROTL64(t, R[i]);
|
||||||
|
t = bc[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chi
|
||||||
|
for (j = 0; j < 25; j += 5) {
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
bc[i] = st[j + i];
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iota
|
||||||
|
st[0] ^= RC[round];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// little-endian で 64bit 値を読み込む(8 バイトの配列から)
|
||||||
|
__device__ inline uint64_t load64_le(const uint8_t *src) {
|
||||||
|
uint64_t x = 0;
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
x |= ((uint64_t)src[i]) << (8 * i);
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// little-endian で 64bit 値を書き込む(8 バイトの配列へ)
|
||||||
|
__device__ inline void store64_le(uint8_t *dst, uint64_t x) {
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
dst[i] = (uint8_t)(x >> (8 * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
__device__ 関数 sha3_256_device
|
||||||
|
・引数 input, inlen で与えられる入力データを吸収し、
|
||||||
|
SHA3-256 仕様によりパディングおよび Keccak-f[1600] 変換を実行します。
|
||||||
|
・最終的に内部状態の先頭 32 バイト(4 ワード)を little-endian 形式で
|
||||||
|
hash_out に出力します。
|
||||||
|
・SHA3-256 ではレート(吸収部サイズ)が 136 バイトです。
|
||||||
|
*/
|
||||||
|
__device__ void sha3_256_device(const uint8_t *input, size_t inlen, uint8_t *hash_out) {
|
||||||
|
const size_t rate = 136; // SHA3-256 の吸収部サイズ(バイト単位)
|
||||||
|
uint64_t st[25] = {0}; // 内部状態(25ワード=1600ビット)
|
||||||
|
|
||||||
|
for (int i = 0; i < 25; i++) st[i] = 0;
|
||||||
|
// size_t offset = 0; // Removed unused variable
|
||||||
|
|
||||||
|
|
||||||
|
// 通常ブロック(rateバイト)処理(今回inlen=32なのでスキップされるはず)
|
||||||
|
while (inlen >= rate) {
|
||||||
|
// 吸収
|
||||||
|
for (int i = 0; i < (rate / 8); i++) {
|
||||||
|
st[i] ^= load64_le(input + i * 8);
|
||||||
|
}
|
||||||
|
// 最終 Keccak-f
|
||||||
|
keccakf(st);
|
||||||
|
input += rate;
|
||||||
|
inlen -= rate;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
st[i] ^= load64_le(input + i * 8); // 4 * 8 = 32バイト
|
||||||
|
}
|
||||||
|
((uint8_t*)st)[32] ^= 0x06; // パディング(32バイト目)
|
||||||
|
((uint8_t*)st)[rate - 1] ^= 0x80; // パディング(最後のバイト)
|
||||||
|
keccakf(st); // 最終 Keccak-f
|
||||||
|
|
||||||
|
|
||||||
|
// スクイーズ:出力32バイト
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
store64_le(hash_out + i * 8, st[i]);
|
||||||
|
}
|
||||||
|
}
|
85
rin/miner/gpu/RinHash-cuda/test_miner.cu
Normal file
85
rin/miner/gpu/RinHash-cuda/test_miner.cu
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
// External functions from our CUDA implementation
|
||||||
|
extern "C" void RinHash(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
const uint32_t* nonce,
|
||||||
|
uint8_t* output
|
||||||
|
);
|
||||||
|
|
||||||
|
extern "C" void RinHash_mine(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
uint32_t start_nonce,
|
||||||
|
uint32_t num_nonces,
|
||||||
|
uint32_t* found_nonce,
|
||||||
|
uint8_t* target_hash,
|
||||||
|
uint8_t* best_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
void print_hex(const char* label, const uint8_t* data, size_t len) {
|
||||||
|
printf("%s: ", label);
|
||||||
|
for (size_t i = 0; i < len; i++) {
|
||||||
|
printf("%02x", data[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
printf("RinHash CUDA Miner Test\n");
|
||||||
|
printf("=======================\n\n");
|
||||||
|
|
||||||
|
// Initialize CUDA
|
||||||
|
cudaError_t cudaStatus = cudaSetDevice(0);
|
||||||
|
if (cudaStatus != cudaSuccess) {
|
||||||
|
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU?\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test data - sample block header
|
||||||
|
uint32_t version = 0x20000000;
|
||||||
|
uint32_t prev_block[8] = {
|
||||||
|
0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0,
|
||||||
|
0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0
|
||||||
|
};
|
||||||
|
uint32_t merkle_root[8] = {
|
||||||
|
0xabcdef12, 0x34567890, 0xabcdef12, 0x34567890,
|
||||||
|
0xabcdef12, 0x34567890, 0xabcdef12, 0x34567890
|
||||||
|
};
|
||||||
|
uint32_t timestamp = 0x5f123456;
|
||||||
|
uint32_t bits = 0x1d00ffff;
|
||||||
|
uint32_t nonce = 0x12345678;
|
||||||
|
|
||||||
|
uint8_t output[32];
|
||||||
|
|
||||||
|
printf("Testing single hash...\n");
|
||||||
|
RinHash(&version, prev_block, merkle_root, ×tamp, &bits, &nonce, output);
|
||||||
|
print_hex("Hash result", output, 32);
|
||||||
|
|
||||||
|
printf("\nTesting mining (trying 1000 nonces)...\n");
|
||||||
|
uint32_t found_nonce;
|
||||||
|
uint8_t target_hash[32];
|
||||||
|
uint8_t best_hash[32];
|
||||||
|
|
||||||
|
// Set a target (easier than difficulty)
|
||||||
|
memset(target_hash, 0xff, 32);
|
||||||
|
|
||||||
|
RinHash_mine(&version, prev_block, merkle_root, ×tamp, &bits,
|
||||||
|
0, 1000, &found_nonce, target_hash, best_hash);
|
||||||
|
|
||||||
|
printf("Found nonce: 0x%08x\n", found_nonce);
|
||||||
|
print_hex("Best hash", best_hash, 32);
|
||||||
|
|
||||||
|
printf("\nTest completed successfully!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
21
rin/miner/gpu/RinHash-hip/CMakeLists.txt
Normal file
21
rin/miner/gpu/RinHash-hip/CMakeLists.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.21)
|
||||||
|
project(RinHashHIP LANGUAGES CXX HIP)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_HIP_STANDARD 17)
|
||||||
|
|
||||||
|
# Enable HIP
|
||||||
|
find_package(HIP REQUIRED)
|
||||||
|
|
||||||
|
set(SOURCES
|
||||||
|
rinhash.hip.cu
|
||||||
|
sha3-256.hip.cu
|
||||||
|
)
|
||||||
|
|
||||||
|
add_executable(rinhash-hip-miner ${SOURCES})
|
||||||
|
|
||||||
|
target_include_directories(rinhash-hip-miner PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
|
target_compile_definitions(rinhash-hip-miner PRIVATE __HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
|
target_link_libraries(rinhash-hip-miner PRIVATE HIP::device)
|
929
rin/miner/gpu/RinHash-hip/argon2d_device.cuh
Normal file
929
rin/miner/gpu/RinHash-hip/argon2d_device.cuh
Normal file
@@ -0,0 +1,929 @@
|
|||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <device_launch_parameters.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
//=== Argon2 定数 ===//
|
||||||
|
#define ARGON2_BLOCK_SIZE 1024
|
||||||
|
#define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8)
|
||||||
|
#define ARGON2_OWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 16)
|
||||||
|
#define ARGON2_HWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 32)
|
||||||
|
#define ARGON2_SYNC_POINTS 4
|
||||||
|
#define ARGON2_PREHASH_DIGEST_LENGTH 64
|
||||||
|
#define ARGON2_PREHASH_SEED_LENGTH 72
|
||||||
|
#define ARGON2_VERSION_10 0x10
|
||||||
|
#define ARGON2_VERSION_13 0x13
|
||||||
|
#define ARGON2_ADDRESSES_IN_BLOCK 128
|
||||||
|
|
||||||
|
//=== Blake2b 定数 ===//
|
||||||
|
#define BLAKE2B_BLOCKBYTES 128
|
||||||
|
#define BLAKE2B_OUTBYTES 64
|
||||||
|
#define BLAKE2B_KEYBYTES 64
|
||||||
|
#define BLAKE2B_SALTBYTES 16
|
||||||
|
#define BLAKE2B_PERSONALBYTES 16
|
||||||
|
#define BLAKE2B_ROUNDS 12
|
||||||
|
|
||||||
|
//=== 構造体定義 ===//
|
||||||
|
typedef struct __align__(64) block_ {
|
||||||
|
uint64_t v[ARGON2_QWORDS_IN_BLOCK];
|
||||||
|
} block;
|
||||||
|
|
||||||
|
typedef struct Argon2_instance_t {
|
||||||
|
block *memory; /* Memory pointer */
|
||||||
|
uint32_t version;
|
||||||
|
uint32_t passes; /* Number of passes */
|
||||||
|
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||||
|
uint32_t segment_length;
|
||||||
|
uint32_t lane_length;
|
||||||
|
uint32_t lanes;
|
||||||
|
uint32_t threads;
|
||||||
|
int print_internals; /* whether to print the memory blocks */
|
||||||
|
} argon2_instance_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Argon2 position: where we construct the block right now. Used to distribute
|
||||||
|
* work between threads.
|
||||||
|
*/
|
||||||
|
typedef struct Argon2_position_t {
|
||||||
|
uint32_t pass;
|
||||||
|
uint32_t lane;
|
||||||
|
uint8_t slice;
|
||||||
|
uint32_t index;
|
||||||
|
} argon2_position_t;
|
||||||
|
|
||||||
|
typedef struct __blake2b_state {
|
||||||
|
uint64_t h[8];
|
||||||
|
uint64_t t[2];
|
||||||
|
uint64_t f[2];
|
||||||
|
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||||
|
unsigned buflen;
|
||||||
|
unsigned outlen;
|
||||||
|
uint8_t last_node;
|
||||||
|
} blake2b_state;
|
||||||
|
|
||||||
|
typedef struct __blake2b_param {
|
||||||
|
uint8_t digest_length; /* 1 */
|
||||||
|
uint8_t key_length; /* 2 */
|
||||||
|
uint8_t fanout; /* 3 */
|
||||||
|
uint8_t depth; /* 4 */
|
||||||
|
uint32_t leaf_length; /* 8 */
|
||||||
|
uint64_t node_offset; /* 16 */
|
||||||
|
uint8_t node_depth; /* 17 */
|
||||||
|
uint8_t inner_length; /* 18 */
|
||||||
|
uint8_t reserved[14]; /* 32 */
|
||||||
|
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||||
|
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||||
|
} blake2b_param;
|
||||||
|
|
||||||
|
//=== 定数メモリ ===//
|
||||||
|
__constant__ uint64_t blake2b_IV[8] = {
|
||||||
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||||
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||||
|
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||||
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
__constant__ uint8_t blake2b_sigma[12][16] = {
|
||||||
|
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||||
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||||
|
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||||
|
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||||
|
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||||
|
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||||
|
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||||
|
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||||
|
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||||
|
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||||
|
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||||
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}
|
||||||
|
};
|
||||||
|
|
||||||
|
//=== 共通ヘルパー関数 ===//
|
||||||
|
__device__ __forceinline__ uint64_t rotr64(uint64_t x, uint32_t n) {
|
||||||
|
return (x >> n) | (x << (64 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
// fBlaMka関数をCリファレンス実装と完全に一致させる
|
||||||
|
__device__ __forceinline__ uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||||
|
const uint64_t m = 0xFFFFFFFFULL;
|
||||||
|
uint64_t xy = (x & m) * (y & m);
|
||||||
|
return x + y + 2 * xy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b G関数 - リファレンス実装と完全に一致させる
|
||||||
|
__device__ __forceinline__ void blake2b_G(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t m1, uint64_t m2) {
|
||||||
|
a = a + b + m1;
|
||||||
|
d = rotr64(d ^ a, 32);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 24);
|
||||||
|
a = a + b + m2;
|
||||||
|
d = rotr64(d ^ a, 16);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 63);
|
||||||
|
}
|
||||||
|
|
||||||
|
// リトルエンディアンでの32ビット値の格納
|
||||||
|
__device__ __forceinline__ void store32(void *dst, uint32_t w) {
|
||||||
|
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||||
|
memcpy(dst, &w, sizeof w);
|
||||||
|
#else
|
||||||
|
uint8_t *p = (uint8_t *)dst;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
w >>= 8;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
w >>= 8;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
w >>= 8;
|
||||||
|
*p++ = (uint8_t)w;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ void blake2b_increment_counter(blake2b_state *S,
|
||||||
|
uint64_t inc) {
|
||||||
|
S->t[0] += inc;
|
||||||
|
S->t[1] += (S->t[0] < inc);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void blake2b_set_lastnode(blake2b_state *S) {
|
||||||
|
S->f[1] = (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void blake2b_set_lastblock(blake2b_state *S) {
|
||||||
|
if (S->last_node) {
|
||||||
|
blake2b_set_lastnode(S);
|
||||||
|
}
|
||||||
|
S->f[0] = (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add structure-specific memset function
|
||||||
|
__device__ void blake2b_state_memset(blake2b_state* S) {
|
||||||
|
for (int i = 0; i < sizeof(blake2b_state); i++) {
|
||||||
|
((uint8_t*)S)[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Add missing xor_block function
|
||||||
|
__device__ void xor_block(block* dst, const block* src) {
|
||||||
|
for (int i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
|
||||||
|
dst->v[i] ^= src->v[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// custom memcpy, apparently cuda's memcpy is slow
|
||||||
|
// when called within a kernel
|
||||||
|
__device__ void c_memcpy(void *dest, const void *src, size_t n) {
|
||||||
|
uint8_t *d = (uint8_t*)dest;
|
||||||
|
const uint8_t *s = (const uint8_t*)src;
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
d[i] = s[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add missing copy_block function
|
||||||
|
__device__ void copy_block(block* dst, const block* src) {
|
||||||
|
c_memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill_blockをCリファレンス実装と完全に一致させる
|
||||||
|
__device__ void fill_block(const block* prev_block, const block* ref_block, block* next_block, int with_xor) {
|
||||||
|
block blockR = {};
|
||||||
|
block block_tmp = {};
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
copy_block(&blockR, ref_block);
|
||||||
|
xor_block(&blockR, prev_block);
|
||||||
|
copy_block(&block_tmp, &blockR);
|
||||||
|
|
||||||
|
if (with_xor) {
|
||||||
|
xor_block(&block_tmp, next_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
// G function without macro
|
||||||
|
auto g = [](uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d) {
|
||||||
|
a = fBlaMka(a, b);
|
||||||
|
d = rotr64(d ^ a, 32);
|
||||||
|
c = fBlaMka(c, d);
|
||||||
|
b = rotr64(b ^ c, 24);
|
||||||
|
a = fBlaMka(a, b);
|
||||||
|
d = rotr64(d ^ a, 16);
|
||||||
|
c = fBlaMka(c, d);
|
||||||
|
b = rotr64(b ^ c, 63);
|
||||||
|
};
|
||||||
|
|
||||||
|
// BLAKE2_ROUND_NOMSG function without macro
|
||||||
|
auto blake2_round = [&g](uint64_t& v0, uint64_t& v1, uint64_t& v2, uint64_t& v3,
|
||||||
|
uint64_t& v4, uint64_t& v5, uint64_t& v6, uint64_t& v7,
|
||||||
|
uint64_t& v8, uint64_t& v9, uint64_t& v10, uint64_t& v11,
|
||||||
|
uint64_t& v12, uint64_t& v13, uint64_t& v14, uint64_t& v15) {
|
||||||
|
do {
|
||||||
|
g(v0, v4, v8, v12);
|
||||||
|
g(v1, v5, v9, v13);
|
||||||
|
g(v2, v6, v10, v14);
|
||||||
|
g(v3, v7, v11, v15);
|
||||||
|
g(v0, v5, v10, v15);
|
||||||
|
g(v1, v6, v11, v12);
|
||||||
|
g(v2, v7, v8, v13);
|
||||||
|
g(v3, v4, v9, v14);
|
||||||
|
} while ((void)0, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Apply Blake2 on columns
|
||||||
|
for (i = 0; i < 8; ++i) {
|
||||||
|
blake2_round(
|
||||||
|
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||||
|
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||||
|
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||||
|
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||||
|
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||||
|
blockR.v[16 * i + 15]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply Blake2 on rows
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
blake2_round(
|
||||||
|
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||||
|
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||||
|
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||||
|
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||||
|
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||||
|
blockR.v[2 * i + 113]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
copy_block(next_block, &block_tmp);
|
||||||
|
xor_block(next_block, &blockR);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename ptr_t>
|
||||||
|
__device__ void c_memset(ptr_t dest, T val, int count) {
|
||||||
|
for(int i=0; i<count; i++)
|
||||||
|
dest[i] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void init_block_value(block *b, uint8_t in) { c_memset(b->v, in, sizeof(b->v)); }
|
||||||
|
|
||||||
|
__device__ void next_addresses(block *address_block, block *input_block,
|
||||||
|
const block *zero_block) {
|
||||||
|
input_block->v[6]++;
|
||||||
|
fill_block(zero_block, input_block, address_block, 0);
|
||||||
|
fill_block(zero_block, address_block, address_block, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void G1(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t x, uint64_t y) {
|
||||||
|
a = a + b + x;
|
||||||
|
d = rotr64(d ^ a, 32);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 24);
|
||||||
|
a = a + b + y;
|
||||||
|
d = rotr64(d ^ a, 16);
|
||||||
|
c = c + d;
|
||||||
|
b = rotr64(b ^ c, 63);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b compression function F
|
||||||
|
__device__ void blake2b_compress(blake2b_state* S, const uint8_t block[BLAKE2B_BLOCKBYTES]) {
|
||||||
|
uint64_t m[16];
|
||||||
|
uint64_t v[16];
|
||||||
|
|
||||||
|
// Load message block into m[16]
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
const uint8_t* p = block + i * 8;
|
||||||
|
m[i] = ((uint64_t)p[0])
|
||||||
|
| ((uint64_t)p[1] << 8)
|
||||||
|
| ((uint64_t)p[2] << 16)
|
||||||
|
| ((uint64_t)p[3] << 24)
|
||||||
|
| ((uint64_t)p[4] << 32)
|
||||||
|
| ((uint64_t)p[5] << 40)
|
||||||
|
| ((uint64_t)p[6] << 48)
|
||||||
|
| ((uint64_t)p[7] << 56);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize v[0..15]
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
v[i] = S->h[i];
|
||||||
|
v[i + 8] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
v[12] ^= S->t[0];
|
||||||
|
v[13] ^= S->t[1];
|
||||||
|
v[14] ^= S->f[0];
|
||||||
|
v[15] ^= S->f[1];
|
||||||
|
|
||||||
|
for (int r = 0; r < BLAKE2B_ROUNDS; r++) {
|
||||||
|
const uint8_t* s = blake2b_sigma[r];
|
||||||
|
|
||||||
|
// Column step
|
||||||
|
G1(v[0], v[4], v[8], v[12], m[s[0]], m[s[1]]);
|
||||||
|
G1(v[1], v[5], v[9], v[13], m[s[2]], m[s[3]]);
|
||||||
|
G1(v[2], v[6], v[10], v[14], m[s[4]], m[s[5]]);
|
||||||
|
G1(v[3], v[7], v[11], v[15], m[s[6]], m[s[7]]);
|
||||||
|
|
||||||
|
// Diagonal step
|
||||||
|
G1(v[0], v[5], v[10], v[15], m[s[8]], m[s[9]]);
|
||||||
|
G1(v[1], v[6], v[11], v[12], m[s[10]], m[s[11]]);
|
||||||
|
G1(v[2], v[7], v[8], v[13], m[s[12]], m[s[13]]);
|
||||||
|
G1(v[3], v[4], v[9], v[14], m[s[14]], m[s[15]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finalization
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
S->h[i] ^= v[i] ^ v[i + 8];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions to load/store 64-bit values in little-endian order
|
||||||
|
__device__ __forceinline__ uint64_t load64(const void* src) {
|
||||||
|
const uint8_t* p = (const uint8_t*)src;
|
||||||
|
return ((uint64_t)(p[0]))
|
||||||
|
| ((uint64_t)(p[1]) << 8)
|
||||||
|
| ((uint64_t)(p[2]) << 16)
|
||||||
|
| ((uint64_t)(p[3]) << 24)
|
||||||
|
| ((uint64_t)(p[4]) << 32)
|
||||||
|
| ((uint64_t)(p[5]) << 40)
|
||||||
|
| ((uint64_t)(p[6]) << 48)
|
||||||
|
| ((uint64_t)(p[7]) << 56);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void store64(void* dst, uint64_t w) {
|
||||||
|
uint8_t* p = (uint8_t*)dst;
|
||||||
|
p[0] = (uint8_t)(w);
|
||||||
|
p[1] = (uint8_t)(w >> 8);
|
||||||
|
p[2] = (uint8_t)(w >> 16);
|
||||||
|
p[3] = (uint8_t)(w >> 24);
|
||||||
|
p[4] = (uint8_t)(w >> 32);
|
||||||
|
p[5] = (uint8_t)(w >> 40);
|
||||||
|
p[6] = (uint8_t)(w >> 48);
|
||||||
|
p[7] = (uint8_t)(w >> 56);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void load_block(block *dst, const void *input) {
|
||||||
|
unsigned i;
|
||||||
|
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||||
|
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void store_block(void *output, const block *src) {
|
||||||
|
unsigned i;
|
||||||
|
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||||
|
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b init function to match reference implementation exactly
|
||||||
|
__device__ int blake2b_init(blake2b_state* S, size_t outlen) {
|
||||||
|
blake2b_param P;
|
||||||
|
// Clear state using our custom function
|
||||||
|
blake2b_state_memset(S);
|
||||||
|
|
||||||
|
// Set parameters according to Blake2b spec
|
||||||
|
P.digest_length = (uint8_t)outlen;
|
||||||
|
P.key_length = 0;
|
||||||
|
P.fanout = 1;
|
||||||
|
P.depth = 1;
|
||||||
|
P.leaf_length = 0;
|
||||||
|
P.node_offset = 0;
|
||||||
|
P.node_depth = 0;
|
||||||
|
P.inner_length = 0;
|
||||||
|
c_memset(P.reserved, 0, sizeof(P.reserved));
|
||||||
|
c_memset(P.salt, 0, sizeof(P.salt));
|
||||||
|
c_memset(P.personal, 0, sizeof(P.personal));
|
||||||
|
|
||||||
|
// Initialize state vector with IV
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
S->h[i] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned char *p = (const unsigned char *)(&P);
|
||||||
|
/* IV XOR Parameter Block */
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||||
|
}
|
||||||
|
S->outlen = P.digest_length;
|
||||||
|
return 0; // Success
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ int FLAG_clear_internal_memory = 0;
|
||||||
|
__device__ void clear_internal_memory(void *v, size_t n) {
|
||||||
|
if (FLAG_clear_internal_memory && v) {
|
||||||
|
// secure_wipe_memory(v, n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b update function to match reference implementation
|
||||||
|
__device__ int blake2b_update(blake2b_state* S, const uint8_t* in, size_t inlen) {
|
||||||
|
const uint8_t *pin = (const uint8_t *)in;
|
||||||
|
|
||||||
|
if (inlen == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sanity check */
|
||||||
|
if (S == NULL || in == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Is this a reused state? */
|
||||||
|
if (S->f[0] != 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
|
||||||
|
/* Complete current block */
|
||||||
|
size_t left = S->buflen;
|
||||||
|
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||||
|
c_memcpy(&S->buf[left], pin, fill);
|
||||||
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
|
blake2b_compress(S, S->buf);
|
||||||
|
S->buflen = 0;
|
||||||
|
inlen -= fill;
|
||||||
|
pin += fill;
|
||||||
|
/* Avoid buffer copies when possible */
|
||||||
|
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||||
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
|
blake2b_compress(S, pin);
|
||||||
|
inlen -= BLAKE2B_BLOCKBYTES;
|
||||||
|
pin += BLAKE2B_BLOCKBYTES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c_memcpy(&S->buf[S->buflen], pin, inlen);
|
||||||
|
S->buflen += (unsigned int)inlen;
|
||||||
|
return 0; // Success
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b final function to match reference implementation
|
||||||
|
__device__ int blake2b_final(blake2b_state* S, uint8_t* out, size_t outlen) {
|
||||||
|
if (!S || !out)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||||
|
unsigned int i;
|
||||||
|
blake2b_increment_counter(S, S->buflen);
|
||||||
|
blake2b_set_lastblock(S);
|
||||||
|
c_memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||||
|
blake2b_compress(S, S->buf);
|
||||||
|
|
||||||
|
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||||
|
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
c_memcpy(out, buffer, S->outlen);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||||
|
size_t keylen) {
|
||||||
|
blake2b_param P;
|
||||||
|
|
||||||
|
if (S == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setup Parameter Block for keyed BLAKE2 */
|
||||||
|
P.digest_length = (uint8_t)outlen;
|
||||||
|
P.key_length = (uint8_t)keylen;
|
||||||
|
P.fanout = 1;
|
||||||
|
P.depth = 1;
|
||||||
|
P.leaf_length = 0;
|
||||||
|
P.node_offset = 0;
|
||||||
|
P.node_depth = 0;
|
||||||
|
P.inner_length = 0;
|
||||||
|
c_memset(P.reserved, 0, sizeof(P.reserved));
|
||||||
|
c_memset(P.salt, 0, sizeof(P.salt));
|
||||||
|
c_memset(P.personal, 0, sizeof(P.personal));
|
||||||
|
|
||||||
|
// Initialize state vector with IV
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
S->h[i] = blake2b_IV[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// XOR first element with param
|
||||||
|
const unsigned char *p = (const unsigned char *)(&P);
|
||||||
|
/* IV XOR Parameter Block */
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||||
|
}
|
||||||
|
S->outlen = P.digest_length;
|
||||||
|
|
||||||
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||||
|
c_memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||||
|
c_memcpy(block, key, keylen);
|
||||||
|
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||||
|
/* Burn the key from stack */
|
||||||
|
clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blake2b all-in-one function
|
||||||
|
__device__ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||||
|
const void *key, size_t keylen) {
|
||||||
|
blake2b_state S;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
/* Verify parameters */
|
||||||
|
if (NULL == in && inlen > 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keylen > 0) {
|
||||||
|
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (blake2b_init(&S, outlen) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blake2b_update(&S, (const uint8_t*)in, inlen) < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
ret = blake2b_final(&S, (uint8_t*)out, outlen);
|
||||||
|
|
||||||
|
fail:
|
||||||
|
clear_internal_memory(&S, sizeof(S));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// index_alpha関数を完全にCリファレンス実装と一致させる(関数のシグネチャも含め)
|
||||||
|
__device__ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||||
|
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||||
|
int same_lane) {
|
||||||
|
uint32_t reference_area_size;
|
||||||
|
uint64_t relative_position;
|
||||||
|
uint32_t start_position, absolute_position;
|
||||||
|
|
||||||
|
if (0 == position->pass) {
|
||||||
|
/* First pass */
|
||||||
|
if (0 == position->slice) {
|
||||||
|
/* First slice */
|
||||||
|
reference_area_size =
|
||||||
|
position->index - 1; /* all but the previous */
|
||||||
|
} else {
|
||||||
|
if (same_lane) {
|
||||||
|
/* The same lane => add current segment */
|
||||||
|
reference_area_size =
|
||||||
|
position->slice * instance->segment_length +
|
||||||
|
position->index - 1;
|
||||||
|
} else {
|
||||||
|
reference_area_size =
|
||||||
|
position->slice * instance->segment_length +
|
||||||
|
((position->index == 0) ? (-1) : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Second pass */
|
||||||
|
if (same_lane) {
|
||||||
|
reference_area_size = instance->lane_length -
|
||||||
|
instance->segment_length + position->index -
|
||||||
|
1;
|
||||||
|
} else {
|
||||||
|
reference_area_size = instance->lane_length -
|
||||||
|
instance->segment_length +
|
||||||
|
((position->index == 0) ? (-1) : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||||
|
* relative position */
|
||||||
|
relative_position = pseudo_rand;
|
||||||
|
relative_position = relative_position * relative_position >> 32;
|
||||||
|
relative_position = reference_area_size - 1 -
|
||||||
|
(reference_area_size * relative_position >> 32);
|
||||||
|
|
||||||
|
/* 1.2.5 Computing starting position */
|
||||||
|
start_position = 0;
|
||||||
|
|
||||||
|
if (0 != position->pass) {
|
||||||
|
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||||
|
? 0
|
||||||
|
: (position->slice + 1) * instance->segment_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.6. Computing absolute position */
|
||||||
|
absolute_position = (start_position + relative_position) %
|
||||||
|
instance->lane_length; /* absolute position */
|
||||||
|
return absolute_position;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill_segment関数を追加(Cリファレンス実装と完全に一致)
|
||||||
|
__device__ void fill_segment(const argon2_instance_t *instance,
|
||||||
|
argon2_position_t position) {
|
||||||
|
block *ref_block = NULL, *curr_block = NULL;
|
||||||
|
block address_block, input_block, zero_block;
|
||||||
|
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||||
|
uint32_t prev_offset, curr_offset;
|
||||||
|
uint32_t starting_index;
|
||||||
|
uint32_t i;
|
||||||
|
int data_independent_addressing;
|
||||||
|
|
||||||
|
|
||||||
|
data_independent_addressing = false;
|
||||||
|
|
||||||
|
if (data_independent_addressing) {
|
||||||
|
init_block_value(&zero_block, 0);
|
||||||
|
init_block_value(&input_block, 0);
|
||||||
|
|
||||||
|
input_block.v[0] = position.pass;
|
||||||
|
input_block.v[1] = position.lane;
|
||||||
|
input_block.v[2] = position.slice;
|
||||||
|
input_block.v[3] = instance->memory_blocks;
|
||||||
|
input_block.v[4] = instance->passes;
|
||||||
|
input_block.v[5] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
starting_index = 0;
|
||||||
|
|
||||||
|
if ((0 == position.pass) && (0 == position.slice)) {
|
||||||
|
starting_index = 2; /* we have already generated the first two blocks */
|
||||||
|
|
||||||
|
/* Don't forget to generate the first block of addresses: */
|
||||||
|
if (data_independent_addressing) {
|
||||||
|
next_addresses(&address_block, &input_block, &zero_block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Offset of the current block */
|
||||||
|
curr_offset = position.lane * instance->lane_length +
|
||||||
|
position.slice * instance->segment_length + starting_index;
|
||||||
|
|
||||||
|
if (0 == curr_offset % instance->lane_length) {
|
||||||
|
/* Last block in this lane */
|
||||||
|
prev_offset = curr_offset + instance->lane_length - 1;
|
||||||
|
} else {
|
||||||
|
/* Previous block */
|
||||||
|
prev_offset = curr_offset - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = starting_index; i < instance->segment_length;
|
||||||
|
++i, ++curr_offset, ++prev_offset) {
|
||||||
|
/*1.1 Rotating prev_offset if needed */
|
||||||
|
if (curr_offset % instance->lane_length == 1) {
|
||||||
|
prev_offset = curr_offset - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2 Computing the index of the reference block */
|
||||||
|
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||||
|
if (data_independent_addressing) {
|
||||||
|
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||||
|
next_addresses(&address_block, &input_block, &zero_block);
|
||||||
|
}
|
||||||
|
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||||
|
} else {
|
||||||
|
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.2 Computing the lane of the reference block */
|
||||||
|
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||||
|
|
||||||
|
if ((position.pass == 0) && (position.slice == 0)) {
|
||||||
|
/* Can not reference other lanes yet */
|
||||||
|
ref_lane = position.lane;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1.2.3 Computing the number of possible reference block within the
|
||||||
|
* lane.
|
||||||
|
*/
|
||||||
|
position.index = i;
|
||||||
|
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||||
|
ref_lane == position.lane);
|
||||||
|
|
||||||
|
/* 2 Creating a new block */
|
||||||
|
ref_block =
|
||||||
|
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||||
|
curr_block = instance->memory + curr_offset;
|
||||||
|
if (ARGON2_VERSION_10 == instance->version) {
|
||||||
|
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||||
|
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
|
||||||
|
} else {
|
||||||
|
if(0 == position.pass) {
|
||||||
|
fill_block(instance->memory + prev_offset, ref_block,
|
||||||
|
curr_block, 0);
|
||||||
|
} else {
|
||||||
|
fill_block(instance->memory + prev_offset, ref_block,
|
||||||
|
curr_block, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill_memory関数をCリファレンス実装と完全に一致させる
|
||||||
|
__device__ void fill_memory(block* memory, uint32_t passes, uint32_t lanes, uint32_t lane_length, uint32_t segment_length) {
|
||||||
|
argon2_instance_t instance;
|
||||||
|
instance.version = ARGON2_VERSION_13;
|
||||||
|
instance.passes = passes;
|
||||||
|
instance.memory = memory;
|
||||||
|
instance.memory_blocks = lanes * lane_length;
|
||||||
|
instance.segment_length = segment_length;
|
||||||
|
instance.lane_length = lane_length;
|
||||||
|
instance.lanes = lanes;
|
||||||
|
instance.threads = lanes;
|
||||||
|
instance.print_internals = 0;
|
||||||
|
|
||||||
|
argon2_position_t position;
|
||||||
|
for (uint32_t pass = 0; pass < passes; ++pass) {
|
||||||
|
position.pass = pass;
|
||||||
|
for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) {
|
||||||
|
position.slice = slice;
|
||||||
|
for (uint32_t lane = 0; lane < lanes; ++lane) {
|
||||||
|
position.lane = lane;
|
||||||
|
fill_segment(&instance, position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// blake2b_long関数をCリファレンス実装と完全に一致させる
|
||||||
|
__device__ int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||||
|
uint8_t *out = (uint8_t *)pout;
|
||||||
|
blake2b_state blake_state;
|
||||||
|
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
if (outlen > UINT32_MAX) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure little-endian byte order! */
|
||||||
|
store32(outlen_bytes, (uint32_t)outlen);
|
||||||
|
|
||||||
|
#define TRY(statement) \
|
||||||
|
do { \
|
||||||
|
ret = statement; \
|
||||||
|
if (ret < 0) { \
|
||||||
|
goto fail; \
|
||||||
|
} \
|
||||||
|
} while ((void)0, 0)
|
||||||
|
|
||||||
|
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||||
|
TRY(blake2b_init(&blake_state, outlen));
|
||||||
|
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||||
|
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
|
||||||
|
TRY(blake2b_final(&blake_state, out, outlen));
|
||||||
|
} else {
|
||||||
|
uint32_t toproduce;
|
||||||
|
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||||
|
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||||
|
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||||
|
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||||
|
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
|
||||||
|
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||||
|
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||||
|
out += BLAKE2B_OUTBYTES / 2;
|
||||||
|
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||||
|
|
||||||
|
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||||
|
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||||
|
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES, NULL, 0));
|
||||||
|
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||||
|
out += BLAKE2B_OUTBYTES / 2;
|
||||||
|
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||||
|
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||||
|
0));
|
||||||
|
c_memcpy(out, out_buffer, toproduce);
|
||||||
|
}
|
||||||
|
fail:
|
||||||
|
clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||||
|
return ret;
|
||||||
|
#undef TRY
|
||||||
|
}
|
||||||
|
|
||||||
|
// device_argon2d_hash関数を完全にCリファレンス実装と一致させる
|
||||||
|
__device__ void device_argon2d_hash(
|
||||||
|
uint8_t* output,
|
||||||
|
const uint8_t* input, size_t input_len,
|
||||||
|
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
|
||||||
|
block* memory,
|
||||||
|
const uint8_t* salt, size_t salt_len
|
||||||
|
) {
|
||||||
|
argon2_instance_t instance;
|
||||||
|
// 1. メモリサイズの調整
|
||||||
|
uint32_t memory_blocks = m_cost;
|
||||||
|
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
|
||||||
|
memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t segment_length = memory_blocks / (lanes * ARGON2_SYNC_POINTS);
|
||||||
|
memory_blocks = segment_length * (lanes * ARGON2_SYNC_POINTS);
|
||||||
|
uint32_t lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||||
|
|
||||||
|
// Initialize instance with the provided memory pointer
|
||||||
|
instance.version = ARGON2_VERSION_13;
|
||||||
|
instance.memory = memory; // Use the provided memory pointer
|
||||||
|
instance.passes = t_cost;
|
||||||
|
instance.memory_blocks = memory_blocks;
|
||||||
|
instance.segment_length = segment_length;
|
||||||
|
instance.lane_length = lane_length;
|
||||||
|
instance.lanes = lanes;
|
||||||
|
instance.threads = 1;
|
||||||
|
|
||||||
|
// 2. 初期ハッシュの計算
|
||||||
|
uint8_t blockhash[ARGON2_PREHASH_DIGEST_LENGTH];
|
||||||
|
blake2b_state BlakeHash;
|
||||||
|
|
||||||
|
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
uint8_t value[sizeof(uint32_t)];
|
||||||
|
|
||||||
|
store32(&value, lanes);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, 32);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, memory_blocks);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, t_cost);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, ARGON2_VERSION_13);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, 0);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, input_len);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (const uint8_t *)input, input_len);
|
||||||
|
|
||||||
|
store32(&value, salt_len);
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (const uint8_t *)salt, salt_len);
|
||||||
|
store32(&value, 0);
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
store32(&value, 0);
|
||||||
|
|
||||||
|
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||||
|
|
||||||
|
|
||||||
|
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
// 3. Initialize first blocks in each lane
|
||||||
|
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||||
|
uint8_t initial_hash[ARGON2_PREHASH_SEED_LENGTH];
|
||||||
|
c_memcpy(initial_hash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
c_memset(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0, ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
for (uint32_t l = 0; l < lanes; ++l) {
|
||||||
|
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||||
|
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||||
|
|
||||||
|
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
|
||||||
|
load_block(&memory[l * lane_length], blockhash_bytes);
|
||||||
|
|
||||||
|
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||||
|
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
|
||||||
|
load_block(&memory[l * lane_length + 1], blockhash_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Fill memory
|
||||||
|
fill_memory(memory, t_cost, lanes, lane_length, segment_length);
|
||||||
|
|
||||||
|
// 5. Final block mixing
|
||||||
|
block final_block;
|
||||||
|
copy_block(&final_block, &memory[0 * lane_length + (lane_length - 1)]);
|
||||||
|
|
||||||
|
for (uint32_t l = 1; l < lanes; ++l) {
|
||||||
|
uint32_t last_block_in_lane = l * lane_length + (lane_length - 1);
|
||||||
|
xor_block(&final_block, &memory[last_block_in_lane]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Final hash
|
||||||
|
uint8_t final_block_bytes[ARGON2_BLOCK_SIZE];
|
||||||
|
store_block(final_block_bytes, &final_block);
|
||||||
|
|
||||||
|
blake2b_long(output, 32, final_block_bytes, ARGON2_BLOCK_SIZE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//=== __global__ カーネル例(salt 指定版)===//
|
||||||
|
// ホスト側でブロック用メモリをあらかじめ確保し、そのポインタ(memory_ptr)を渡すことを前提としています。
|
||||||
|
__global__ void argon2d_hash_device_kernel(
|
||||||
|
uint8_t* output,
|
||||||
|
const uint8_t* input, size_t input_len,
|
||||||
|
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
|
||||||
|
block* memory_ptr, // ホスト側で確保したメモリ領域へのポインタ
|
||||||
|
const uint8_t* salt, size_t salt_len
|
||||||
|
) {
|
||||||
|
if (threadIdx.x == 0 && blockIdx.x == 0) {
|
||||||
|
device_argon2d_hash(output, input, input_len, t_cost, m_cost, lanes, memory_ptr, salt, salt_len);
|
||||||
|
}
|
||||||
|
}
|
272
rin/miner/gpu/RinHash-hip/blake3_device.cuh
Normal file
272
rin/miner/gpu/RinHash-hip/blake3_device.cuh
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
#include "blaze3_cpu.cuh"
|
||||||
|
|
||||||
|
// Number of threads per thread block
|
||||||
|
__constant__ const int NUM_THREADS = 16;
|
||||||
|
|
||||||
|
// redefine functions, but for the GPU
|
||||||
|
// all of them are the same but with g_ prefixed
|
||||||
|
__constant__ const u32 g_IV[8] = {
|
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||||
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||||
|
};
|
||||||
|
|
||||||
|
__constant__ const int g_MSG_PERMUTATION[] = {
|
||||||
|
2, 6, 3, 10, 7, 0, 4, 13,
|
||||||
|
1, 11, 12, 5, 9, 14, 15, 8
|
||||||
|
};
|
||||||
|
|
||||||
|
__device__ u32 g_rotr(u32 value, int shift) {
|
||||||
|
return (value >> shift)|(value << (usize - shift));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
|
||||||
|
state[a] = state[a] + state[b] + mx;
|
||||||
|
state[d] = g_rotr((state[d] ^ state[a]), 16);
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
|
||||||
|
state[b] = g_rotr((state[b] ^ state[c]), 12);
|
||||||
|
state[a] = state[a] + state[b] + my;
|
||||||
|
state[d] = g_rotr((state[d] ^ state[a]), 8);
|
||||||
|
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
state[b] = g_rotr((state[b] ^ state[c]), 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_round(u32 state[16], u32 m[16]) {
|
||||||
|
// Mix the columns.
|
||||||
|
g_g(state, 0, 4, 8, 12, m[0], m[1]);
|
||||||
|
g_g(state, 1, 5, 9, 13, m[2], m[3]);
|
||||||
|
g_g(state, 2, 6, 10, 14, m[4], m[5]);
|
||||||
|
g_g(state, 3, 7, 11, 15, m[6], m[7]);
|
||||||
|
// Mix the diagonals.
|
||||||
|
g_g(state, 0, 5, 10, 15, m[8], m[9]);
|
||||||
|
g_g(state, 1, 6, 11, 12, m[10], m[11]);
|
||||||
|
g_g(state, 2, 7, 8, 13, m[12], m[13]);
|
||||||
|
g_g(state, 3, 4, 9, 14, m[14], m[15]);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_permute(u32 m[16]) {
|
||||||
|
u32 permuted[16];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
permuted[i] = m[g_MSG_PERMUTATION[i]];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
m[i] = permuted[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// custom memcpy, apparently cuda's memcpy is slow
|
||||||
|
// when called within a kernel
|
||||||
|
__device__ void g_memcpy(u32 *lhs, const u32 *rhs, int size) {
|
||||||
|
// assuming u32 is 4 bytes
|
||||||
|
int len = size / 4;
|
||||||
|
for(int i=0; i<len; i++)
|
||||||
|
lhs[i] = rhs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// custom memset
|
||||||
|
template<typename T, typename ptr_t>
|
||||||
|
__device__ void g_memset(ptr_t dest, T val, int count) {
|
||||||
|
for(int i=0; i<count; i++)
|
||||||
|
dest[i] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_compress(
|
||||||
|
u32 *chaining_value,
|
||||||
|
u32 *block_words,
|
||||||
|
u64 counter,
|
||||||
|
u32 block_len,
|
||||||
|
u32 flags,
|
||||||
|
u32 *state
|
||||||
|
) {
|
||||||
|
// Search for better alternative
|
||||||
|
g_memcpy(state, chaining_value, 32);
|
||||||
|
g_memcpy(state+8, g_IV, 16);
|
||||||
|
state[12] = (u32)counter;
|
||||||
|
state[13] = (u32)(counter >> 32);
|
||||||
|
state[14] = block_len;
|
||||||
|
state[15] = flags;
|
||||||
|
|
||||||
|
u32 block[16];
|
||||||
|
g_memcpy(block, block_words, 64);
|
||||||
|
|
||||||
|
g_round(state, block); // round 1
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 2
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 3
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 4
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 5
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 6
|
||||||
|
g_permute(block);
|
||||||
|
g_round(state, block); // round 7
|
||||||
|
|
||||||
|
for(int i=0; i<8; i++){
|
||||||
|
state[i] ^= state[i + 8];
|
||||||
|
state[i + 8] ^= chaining_value[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void g_words_from_little_endian_bytes(
|
||||||
|
u8 *bytes, u32 *words, u32 bytes_len
|
||||||
|
) {
|
||||||
|
u32 tmp;
|
||||||
|
for(u32 i=0; i<bytes_len; i+=4) {
|
||||||
|
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
|
||||||
|
words[i/4] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void Chunk::g_compress_chunk(u32 out_flags) {
|
||||||
|
if(flags&PARENT) {
|
||||||
|
g_compress(
|
||||||
|
key,
|
||||||
|
data,
|
||||||
|
0, // counter is always zero for parent nodes
|
||||||
|
BLOCK_LEN,
|
||||||
|
flags | out_flags,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 chaining_value[8];
|
||||||
|
u32 block_len = BLOCK_LEN, flagger;
|
||||||
|
g_memcpy(chaining_value, key, 32);
|
||||||
|
|
||||||
|
bool empty_input = (leaf_len==0);
|
||||||
|
if(empty_input) {
|
||||||
|
for(u32 i=0; i<BLOCK_LEN; i++)
|
||||||
|
leaf_data[i] = 0U;
|
||||||
|
leaf_len = BLOCK_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
// move all mem allocs outside loop
|
||||||
|
u32 block_words[16];
|
||||||
|
u8 block_cast[BLOCK_LEN];
|
||||||
|
|
||||||
|
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
|
||||||
|
flagger = flags;
|
||||||
|
// for the last message block
|
||||||
|
if(i+BLOCK_LEN > leaf_len)
|
||||||
|
block_len = leaf_len%BLOCK_LEN;
|
||||||
|
else
|
||||||
|
block_len = BLOCK_LEN;
|
||||||
|
|
||||||
|
// special case
|
||||||
|
if(empty_input)
|
||||||
|
block_len = 0;
|
||||||
|
|
||||||
|
// clear up block_words
|
||||||
|
g_memset(block_words, 0, 16);
|
||||||
|
|
||||||
|
u32 new_block_len(block_len);
|
||||||
|
if(block_len%4)
|
||||||
|
new_block_len += 4 - (block_len%4);
|
||||||
|
|
||||||
|
// This memcpy is fine since data is a byte array
|
||||||
|
memcpy(block_cast, leaf_data+i, new_block_len*sizeof(*block_cast));
|
||||||
|
|
||||||
|
g_words_from_little_endian_bytes(leaf_data+i, block_words, new_block_len);
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
flagger |= CHUNK_START;
|
||||||
|
if(i+BLOCK_LEN >= leaf_len)
|
||||||
|
flagger |= CHUNK_END | out_flags;
|
||||||
|
|
||||||
|
// raw hash for root node
|
||||||
|
g_compress(
|
||||||
|
chaining_value,
|
||||||
|
block_words,
|
||||||
|
counter,
|
||||||
|
block_len,
|
||||||
|
flagger,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
g_memcpy(chaining_value, raw_hash, 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void compute(Chunk *data, int l, int r) {
|
||||||
|
// n is always a power of 2
|
||||||
|
int n = r-l;
|
||||||
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
if(tid >= n)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(n==1) {
|
||||||
|
data[l].g_compress_chunk();
|
||||||
|
// printf("Compressing : %d\n", l);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
compute<<<n/2,16>>>(data, l, l+n/2);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
compute<<<n/2,16>>>(data, l+n/2, r);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
data[l].flags |= PARENT;
|
||||||
|
|
||||||
|
memcpy(data[l].data, data[l].raw_hash, 32);
|
||||||
|
memcpy(data[l].data+8, data[l+n/2].raw_hash, 32);
|
||||||
|
data[l].g_compress_chunk();
|
||||||
|
// printf("Compressing : %d to %d\n", l, r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPU version of light_hash (unchanged)
|
||||||
|
void light_hash(Chunk *data, int N, Chunk *result, Chunk *memory_bar) {
|
||||||
|
const int data_size = N*sizeof(Chunk);
|
||||||
|
|
||||||
|
// Device settings
|
||||||
|
// Allows DeviceSync to be called upto 16 levels of recursion
|
||||||
|
cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 16);
|
||||||
|
|
||||||
|
// Device vector
|
||||||
|
Chunk *g_data = memory_bar;
|
||||||
|
cudaMemcpy(g_data, data, data_size, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Actual computation of hash
|
||||||
|
compute<<<N,32>>>(g_data, 0, N);
|
||||||
|
|
||||||
|
cudaMemcpy(result, g_data, sizeof(Chunk), cudaMemcpyDeviceToHost);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Device-callable version of light_hash
|
||||||
|
__device__ void light_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||||
|
// Create a single chunk for processing the input
|
||||||
|
Chunk chunk;
|
||||||
|
|
||||||
|
// Initialize the chunk with the input data
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
chunk.key[i] = g_IV[i]; // Use device constant IV
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the input data to leaf_data (with bounds checking)
|
||||||
|
size_t copy_len = min(input_len, (size_t)BLOCK_LEN * 16); // Ensure we don't overflow
|
||||||
|
for (size_t i = 0; i < copy_len; i++) {
|
||||||
|
chunk.leaf_data[i] = input[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk.leaf_len = copy_len;
|
||||||
|
chunk.counter = 0;
|
||||||
|
chunk.flags = 0; // Default flags
|
||||||
|
|
||||||
|
// Process the chunk directly
|
||||||
|
chunk.g_compress_chunk(ROOT); // Set ROOT flag for final output
|
||||||
|
|
||||||
|
// Copy the raw hash to the output
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
// Convert 32-bit words to bytes in little-endian format
|
||||||
|
output[i*4] = (uint8_t)(chunk.raw_hash[i]);
|
||||||
|
output[i*4+1] = (uint8_t)(chunk.raw_hash[i] >> 8);
|
||||||
|
output[i*4+2] = (uint8_t)(chunk.raw_hash[i] >> 16);
|
||||||
|
output[i*4+3] = (uint8_t)(chunk.raw_hash[i] >> 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alias for compatibility with other device code
|
||||||
|
__device__ void blake3_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||||
|
light_hash_device(input, input_len, output);
|
||||||
|
}
|
420
rin/miner/gpu/RinHash-hip/blaze3_cpu.cuh
Normal file
420
rin/miner/gpu/RinHash-hip/blaze3_cpu.cuh
Normal file
@@ -0,0 +1,420 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
// Let's use a pinned memory vector!
|
||||||
|
#include <thrust/host_vector.h>
|
||||||
|
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
||||||
|
|
||||||
|
using u32 = uint32_t;
|
||||||
|
using u64 = uint64_t;
|
||||||
|
using u8 = uint8_t;
|
||||||
|
|
||||||
|
const u32 OUT_LEN = 32;
|
||||||
|
const u32 KEY_LEN = 32;
|
||||||
|
const u32 BLOCK_LEN = 64;
|
||||||
|
const u32 CHUNK_LEN = 1024;
|
||||||
|
// Multiple chunks make a snicker bar :)
|
||||||
|
const u32 SNICKER = 1U << 10;
|
||||||
|
// Factory height and snicker size have an inversly propotional relationship
|
||||||
|
// FACTORY_HT * (log2 SNICKER) + 10 >= 64
|
||||||
|
const u32 FACTORY_HT = 5;
|
||||||
|
|
||||||
|
const u32 CHUNK_START = 1 << 0;
|
||||||
|
const u32 CHUNK_END = 1 << 1;
|
||||||
|
const u32 PARENT = 1 << 2;
|
||||||
|
const u32 ROOT = 1 << 3;
|
||||||
|
const u32 KEYED_HASH = 1 << 4;
|
||||||
|
const u32 DERIVE_KEY_CONTEXT = 1 << 5;
|
||||||
|
const u32 DERIVE_KEY_MATERIAL = 1 << 6;
|
||||||
|
|
||||||
|
const int usize = sizeof(u32) * 8;
|
||||||
|
|
||||||
|
u32 IV[8] = {
|
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||||
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||||
|
};
|
||||||
|
|
||||||
|
const int MSG_PERMUTATION[] = {
|
||||||
|
2, 6, 3, 10, 7, 0, 4, 13,
|
||||||
|
1, 11, 12, 5, 9, 14, 15, 8
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 rotr(u32 value, int shift) {
|
||||||
|
return (value >> shift)|(value << (usize - shift));
|
||||||
|
}
|
||||||
|
|
||||||
|
void g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
|
||||||
|
state[a] = state[a] + state[b] + mx;
|
||||||
|
state[d] = rotr((state[d] ^ state[a]), 16);
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
|
||||||
|
state[b] = rotr((state[b] ^ state[c]), 12);
|
||||||
|
state[a] = state[a] + state[b] + my;
|
||||||
|
state[d] = rotr((state[d] ^ state[a]), 8);
|
||||||
|
|
||||||
|
state[c] = state[c] + state[d];
|
||||||
|
state[b] = rotr((state[b] ^ state[c]), 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
void round(u32 state[16], u32 m[16]) {
|
||||||
|
// Mix the columns.
|
||||||
|
g(state, 0, 4, 8, 12, m[0], m[1]);
|
||||||
|
g(state, 1, 5, 9, 13, m[2], m[3]);
|
||||||
|
g(state, 2, 6, 10, 14, m[4], m[5]);
|
||||||
|
g(state, 3, 7, 11, 15, m[6], m[7]);
|
||||||
|
// Mix the diagonals.
|
||||||
|
g(state, 0, 5, 10, 15, m[8], m[9]);
|
||||||
|
g(state, 1, 6, 11, 12, m[10], m[11]);
|
||||||
|
g(state, 2, 7, 8, 13, m[12], m[13]);
|
||||||
|
g(state, 3, 4, 9, 14, m[14], m[15]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void permute(u32 m[16]) {
|
||||||
|
u32 permuted[16];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
permuted[i] = m[MSG_PERMUTATION[i]];
|
||||||
|
for(int i=0; i<16; i++)
|
||||||
|
m[i] = permuted[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
void compress(
|
||||||
|
u32 *chaining_value,
|
||||||
|
u32 *block_words,
|
||||||
|
u64 counter,
|
||||||
|
u32 block_len,
|
||||||
|
u32 flags,
|
||||||
|
u32 *state
|
||||||
|
) {
|
||||||
|
memcpy(state, chaining_value, 8*sizeof(*state));
|
||||||
|
memcpy(state+8, IV, 4*sizeof(*state));
|
||||||
|
state[12] = (u32)counter;
|
||||||
|
state[13] = (u32)(counter >> 32);
|
||||||
|
state[14] = block_len;
|
||||||
|
state[15] = flags;
|
||||||
|
|
||||||
|
u32 block[16];
|
||||||
|
memcpy(block, block_words, 16*sizeof(*block));
|
||||||
|
|
||||||
|
round(state, block); // round 1
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 2
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 3
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 4
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 5
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 6
|
||||||
|
permute(block);
|
||||||
|
round(state, block); // round 7
|
||||||
|
|
||||||
|
for(int i=0; i<8; i++){
|
||||||
|
state[i] ^= state[i + 8];
|
||||||
|
state[i + 8] ^= chaining_value[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void words_from_little_endian_bytes(u8 *bytes, u32 *words, u32 bytes_len) {
|
||||||
|
u32 tmp;
|
||||||
|
for(u32 i=0; i<bytes_len; i+=4) {
|
||||||
|
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
|
||||||
|
words[i/4] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Chunk {
|
||||||
|
// use only when it is a leaf node
|
||||||
|
// leaf data may have less than 1024 bytes
|
||||||
|
u8 leaf_data[1024];
|
||||||
|
u32 leaf_len;
|
||||||
|
// use in all other cases
|
||||||
|
// data will always have 64 bytes
|
||||||
|
u32 data[16];
|
||||||
|
u32 flags;
|
||||||
|
u32 raw_hash[16];
|
||||||
|
u32 key[8];
|
||||||
|
// only useful for leaf nodes
|
||||||
|
u64 counter;
|
||||||
|
// Constructor for leaf nodes
|
||||||
|
__device__ __host__ Chunk(char *input, int size, u32 _flags, u32 *_key, u64 ctr){
|
||||||
|
counter = ctr;
|
||||||
|
flags = _flags;
|
||||||
|
memcpy(key, _key, 8*sizeof(*key));
|
||||||
|
memset(leaf_data, 0, 1024);
|
||||||
|
memcpy(leaf_data, input, size);
|
||||||
|
leaf_len = size;
|
||||||
|
}
|
||||||
|
__device__ __host__ Chunk(u32 _flags, u32 *_key) {
|
||||||
|
counter = 0;
|
||||||
|
flags = _flags;
|
||||||
|
memcpy(key, _key, 8*sizeof(*key));
|
||||||
|
leaf_len = 0;
|
||||||
|
}
|
||||||
|
__device__ __host__ Chunk() {}
|
||||||
|
// Chunk() : leaf_len(0) {}
|
||||||
|
// process data in sizes of message blocks and store cv in hash
|
||||||
|
void compress_chunk(u32=0);
|
||||||
|
__device__ void g_compress_chunk(u32=0);
|
||||||
|
};
|
||||||
|
|
||||||
|
void Chunk::compress_chunk(u32 out_flags) {
|
||||||
|
if(flags&PARENT) {
|
||||||
|
compress(
|
||||||
|
key,
|
||||||
|
data,
|
||||||
|
0, // counter is always zero for parent nodes
|
||||||
|
BLOCK_LEN,
|
||||||
|
flags | out_flags,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 chaining_value[8], block_len = BLOCK_LEN, flagger;
|
||||||
|
memcpy(chaining_value, key, 8*sizeof(*chaining_value));
|
||||||
|
|
||||||
|
bool empty_input = (leaf_len==0);
|
||||||
|
if(empty_input) {
|
||||||
|
for(u32 i=0; i<BLOCK_LEN; i++)
|
||||||
|
leaf_data[i] = 0U;
|
||||||
|
leaf_len = BLOCK_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
|
||||||
|
flagger = flags;
|
||||||
|
// for the last message block
|
||||||
|
if(i+BLOCK_LEN > leaf_len)
|
||||||
|
block_len = leaf_len%BLOCK_LEN;
|
||||||
|
else
|
||||||
|
block_len = BLOCK_LEN;
|
||||||
|
|
||||||
|
// special case
|
||||||
|
if(empty_input)
|
||||||
|
block_len = 0;
|
||||||
|
|
||||||
|
u32 block_words[16];
|
||||||
|
memset(block_words, 0, 16*sizeof(*block_words));
|
||||||
|
u32 new_block_len(block_len);
|
||||||
|
if(block_len%4)
|
||||||
|
new_block_len += 4 - (block_len%4);
|
||||||
|
|
||||||
|
// BLOCK_LEN is the max possible length of block_cast
|
||||||
|
u8 block_cast[BLOCK_LEN];
|
||||||
|
memset(block_cast, 0, new_block_len*sizeof(*block_cast));
|
||||||
|
memcpy(block_cast, leaf_data+i, block_len*sizeof(*block_cast));
|
||||||
|
|
||||||
|
words_from_little_endian_bytes(block_cast, block_words, new_block_len);
|
||||||
|
|
||||||
|
if(i==0)
|
||||||
|
flagger |= CHUNK_START;
|
||||||
|
if(i+BLOCK_LEN >= leaf_len)
|
||||||
|
flagger |= CHUNK_END | out_flags;
|
||||||
|
|
||||||
|
// raw hash for root node
|
||||||
|
compress(
|
||||||
|
chaining_value,
|
||||||
|
block_words,
|
||||||
|
counter,
|
||||||
|
block_len,
|
||||||
|
flagger,
|
||||||
|
raw_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
memcpy(chaining_value, raw_hash, 8*sizeof(*chaining_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
using thrust_vector = thrust::host_vector<
|
||||||
|
Chunk,
|
||||||
|
thrust::system::cuda::experimental::pinned_allocator<Chunk>
|
||||||
|
>;
|
||||||
|
|
||||||
|
// The GPU hasher
|
||||||
|
void light_hash(Chunk*, int, Chunk*, Chunk*);
|
||||||
|
|
||||||
|
// Sanity checks
|
||||||
|
Chunk hash_many(Chunk *data, int first, int last, Chunk *memory_bar) {
|
||||||
|
// n will always be a power of 2
|
||||||
|
int n = last-first;
|
||||||
|
// Reduce GPU calling overhead
|
||||||
|
if(n == 1) {
|
||||||
|
data[first].compress_chunk();
|
||||||
|
return data[first];
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk ret;
|
||||||
|
light_hash(data+first, n, &ret, memory_bar);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
// CPU style execution
|
||||||
|
// Chunk left, right;
|
||||||
|
// left = hash_many(data, first, first+n/2);
|
||||||
|
// right = hash_many(data, first+n/2, last);
|
||||||
|
// Chunk parent(left.flags, left.key);
|
||||||
|
// parent.flags |= PARENT;
|
||||||
|
// memcpy(parent.data, left.raw_hash, 32);
|
||||||
|
// memcpy(parent.data+8, right.raw_hash, 32);
|
||||||
|
// parent.compress_chunk();
|
||||||
|
// return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk merge(Chunk &left, Chunk &right);
|
||||||
|
void hash_root(Chunk &node, vector<u8> &out_slice);
|
||||||
|
|
||||||
|
struct Hasher {
|
||||||
|
u32 key[8];
|
||||||
|
u32 flags;
|
||||||
|
u64 ctr;
|
||||||
|
u64 file_size;
|
||||||
|
// A memory bar for CUDA to use during it's computation
|
||||||
|
Chunk* memory_bar;
|
||||||
|
// Factory is an array of FACTORY_HT possible SNICKER bars
|
||||||
|
thrust_vector factory[FACTORY_HT];
|
||||||
|
|
||||||
|
// methods
|
||||||
|
static Hasher new_internal(u32 key[8], u32 flags, u64 fsize);
|
||||||
|
static Hasher _new(u64);
|
||||||
|
// initializes cuda memory (if needed)
|
||||||
|
void init();
|
||||||
|
// frees cuda memory (if it is there)
|
||||||
|
// free nullptr is a no-op
|
||||||
|
~Hasher() {
|
||||||
|
if(memory_bar)
|
||||||
|
cudaFree(memory_bar);
|
||||||
|
else
|
||||||
|
free(memory_bar);
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(char *input, int size);
|
||||||
|
void finalize(vector<u8> &out_slice);
|
||||||
|
void propagate();
|
||||||
|
};
|
||||||
|
|
||||||
|
Hasher Hasher::new_internal(u32 key[8], u32 flags, u64 fsize) {
|
||||||
|
return Hasher{
|
||||||
|
{
|
||||||
|
key[0], key[1], key[2], key[3],
|
||||||
|
key[4], key[5], key[6], key[7]
|
||||||
|
},
|
||||||
|
flags,
|
||||||
|
0, // counter
|
||||||
|
fsize
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher Hasher::_new(u64 fsize) { return new_internal(IV, 0, fsize); }
|
||||||
|
|
||||||
|
void Hasher::init() {
|
||||||
|
if(file_size<1) {
|
||||||
|
memory_bar = nullptr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u64 num_chunks = ceil(file_size / CHUNK_LEN);
|
||||||
|
u32 bar_size = min(num_chunks, (u64)SNICKER);
|
||||||
|
// Just for safety :)
|
||||||
|
++bar_size;
|
||||||
|
cudaMalloc(&memory_bar, bar_size*sizeof(Chunk));
|
||||||
|
|
||||||
|
// Let the most commonly used places always have memory
|
||||||
|
// +1 so that it does not resize when it hits CHUNK_LEN
|
||||||
|
u32 RESERVE = SNICKER + 1;
|
||||||
|
factory[0].reserve(RESERVE);
|
||||||
|
factory[1].reserve(RESERVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hasher::propagate() {
|
||||||
|
int level=0;
|
||||||
|
// nodes move to upper levels if lower one is one SNICKER long
|
||||||
|
while(factory[level].size() == SNICKER) {
|
||||||
|
Chunk subtree = hash_many(factory[level].data(), 0, SNICKER, memory_bar);
|
||||||
|
factory[level].clear();
|
||||||
|
++level;
|
||||||
|
factory[level].push_back(subtree);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hasher::update(char *input, int size) {
|
||||||
|
factory[0].push_back(Chunk(input, size, flags, key, ctr));
|
||||||
|
++ctr;
|
||||||
|
if(factory[0].size() == SNICKER)
|
||||||
|
propagate();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hasher::finalize(vector<u8> &out_slice) {
|
||||||
|
Chunk root(flags, key);
|
||||||
|
for(int i=0; i<FACTORY_HT; i++) {
|
||||||
|
vector<Chunk> subtrees;
|
||||||
|
u32 n = factory[i].size(), divider=SNICKER;
|
||||||
|
if(!n)
|
||||||
|
continue;
|
||||||
|
int start = 0;
|
||||||
|
while(divider) {
|
||||||
|
if(n÷r) {
|
||||||
|
Chunk subtree = hash_many(factory[i].data(), start, start+divider, memory_bar);
|
||||||
|
subtrees.push_back(subtree);
|
||||||
|
start += divider;
|
||||||
|
}
|
||||||
|
divider >>= 1;
|
||||||
|
}
|
||||||
|
while(subtrees.size()>1) {
|
||||||
|
Chunk tmp1 = subtrees.back();
|
||||||
|
subtrees.pop_back();
|
||||||
|
Chunk tmp2 = subtrees.back();
|
||||||
|
subtrees.pop_back();
|
||||||
|
// tmp2 is the left child
|
||||||
|
// tmp1 is the right child
|
||||||
|
// that's the order they appear within the array
|
||||||
|
Chunk tmp = merge(tmp2, tmp1);
|
||||||
|
subtrees.push_back(tmp);
|
||||||
|
}
|
||||||
|
if(i<FACTORY_HT-1)
|
||||||
|
factory[i+1].push_back(subtrees[0]);
|
||||||
|
else
|
||||||
|
root = subtrees[0];
|
||||||
|
}
|
||||||
|
hash_root(root, out_slice);
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk merge(Chunk &left, Chunk &right) {
|
||||||
|
// cout << "Called merge once\n";
|
||||||
|
left.compress_chunk();
|
||||||
|
right.compress_chunk();
|
||||||
|
|
||||||
|
Chunk parent(left.flags, left.key);
|
||||||
|
parent.flags |= PARENT;
|
||||||
|
// 32 bytes need to be copied for all of these
|
||||||
|
memcpy(parent.data, left.raw_hash, 32);
|
||||||
|
memcpy(parent.data+8, right.raw_hash, 32);
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hash_root(Chunk &node, vector<u8> &out_slice) {
|
||||||
|
// the last message block must not be hashed like the others
|
||||||
|
// it needs to be hashed with the root flag
|
||||||
|
u64 output_block_counter = 0;
|
||||||
|
u64 i=0, k=2*OUT_LEN;
|
||||||
|
|
||||||
|
u32 words[16] = {};
|
||||||
|
for(; int(out_slice.size()-i)>0; i+=k) {
|
||||||
|
node.counter = output_block_counter;
|
||||||
|
node.compress_chunk(ROOT);
|
||||||
|
|
||||||
|
// words is u32[16]
|
||||||
|
memcpy(words, node.raw_hash, 16*sizeof(*words));
|
||||||
|
|
||||||
|
vector<u8> out_block(min(k, (u64)out_slice.size()-i));
|
||||||
|
for(u32 l=0; l<out_block.size(); l+=4) {
|
||||||
|
for(u32 j=0; j<min(4U, (u32)out_block.size()-l); j++)
|
||||||
|
out_block[l+j] = (words[l/4]>>(8*j)) & 0x000000FF;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(u32 j=0; j<out_block.size(); j++)
|
||||||
|
out_slice[i+j] = out_block[j];
|
||||||
|
|
||||||
|
++output_block_counter;
|
||||||
|
}
|
||||||
|
}
|
18
rin/miner/gpu/RinHash-hip/build-hip.bat
Normal file
18
rin/miner/gpu/RinHash-hip/build-hip.bat
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
@echo off
|
||||||
|
setlocal
|
||||||
|
|
||||||
|
where hipcc >nul 2>nul
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo ERROR: hipcc not found. Please install ROCm/HIP toolchain.
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if not exist build mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -G "Ninja" -DHIP_PLATFORM=amd -DCMAKE_BUILD_TYPE=Release ..
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
cmake --build . -j
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
echo Build done. Executable should be at build\rinhash-hip-miner.exe
|
29
rin/miner/gpu/RinHash-hip/hip_runtime_shim.h
Normal file
29
rin/miner/gpu/RinHash-hip/hip_runtime_shim.h
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __HIP_PLATFORM_AMD__
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#include <hip/hip_runtime_api.h>
|
||||||
|
#define cudaError_t hipError_t
|
||||||
|
#define cudaSuccess hipSuccess
|
||||||
|
#define cudaMalloc hipMalloc
|
||||||
|
#define cudaFree hipFree
|
||||||
|
#define cudaMemcpy hipMemcpy
|
||||||
|
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
|
||||||
|
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
|
||||||
|
#define cudaDeviceSynchronize hipDeviceSynchronize
|
||||||
|
#define cudaGetErrorString hipGetErrorString
|
||||||
|
#define cudaGetLastError hipGetLastError
|
||||||
|
#define cudaMemGetInfo hipMemGetInfo
|
||||||
|
#define cudaDeviceReset hipDeviceReset
|
||||||
|
#define __global__ __global__
|
||||||
|
#define __device__ __device__
|
||||||
|
#define __host__ __host__
|
||||||
|
#define __shared__ __shared__
|
||||||
|
#define __syncthreads __syncthreads
|
||||||
|
#define blockIdx hipBlockIdx_x
|
||||||
|
#define threadIdx hipThreadIdx_x
|
||||||
|
#define blockDim hipBlockDim_x
|
||||||
|
#define gridDim hipGridDim_x
|
||||||
|
#define hipLaunchKernelGGL(F,GRID,BLOCK,SHMEM,STREAM,...) \
|
||||||
|
hipLaunchKernelGGL(F, dim3(GRID), dim3(BLOCK), SHMEM, STREAM, __VA_ARGS__)
|
||||||
|
#endif
|
283
rin/miner/gpu/RinHash-hip/rinhash.hip.cu
Normal file
283
rin/miner/gpu/RinHash-hip/rinhash.hip.cu
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
#include "hip_runtime_shim.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
// Include shared device functions
|
||||||
|
#include "rinhash_device.cuh"
|
||||||
|
#include "argon2d_device.cuh"
|
||||||
|
#include "sha3-256.hip.cu"
|
||||||
|
#include "blake3_device.cuh"
|
||||||
|
|
||||||
|
// Modified kernel to use device functions and write output
|
||||||
|
extern "C" __global__ void rinhash_cuda_kernel(
|
||||||
|
const uint8_t* input,
|
||||||
|
size_t input_len,
|
||||||
|
uint8_t* output,
|
||||||
|
block* argon2_memory
|
||||||
|
) {
|
||||||
|
__shared__ uint8_t blake3_out[32];
|
||||||
|
__shared__ uint8_t argon2_out[32];
|
||||||
|
|
||||||
|
if (threadIdx.x == 0) {
|
||||||
|
// Step 1: BLAKE3 hash
|
||||||
|
light_hash_device(input, input_len, blake3_out);
|
||||||
|
|
||||||
|
// Step 2: Argon2d hash (t_cost=2, m_cost=64, lanes=1)
|
||||||
|
uint8_t salt[11] = { 'R','i','n','C','o','i','n','S','a','l','t' };
|
||||||
|
device_argon2d_hash(argon2_out, blake3_out, 32, 2, 64, 1, argon2_memory, salt, 11);
|
||||||
|
|
||||||
|
// Step 3: SHA3-256 hash
|
||||||
|
uint8_t sha3_out[32];
|
||||||
|
sha3_256_device(argon2_out, 32, sha3_out);
|
||||||
|
|
||||||
|
// Write result to output
|
||||||
|
for (int i = 0; i < 32; i++) {
|
||||||
|
output[i] = sha3_out[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
// RinHash HIP implementation for a single header
|
||||||
|
extern "C" void rinhash_cuda(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||||
|
// Argon2 parameters
|
||||||
|
const uint32_t m_cost = 64; // blocks (64 KiB)
|
||||||
|
|
||||||
|
uint8_t *d_input = nullptr;
|
||||||
|
uint8_t *d_output = nullptr;
|
||||||
|
block *d_memory = nullptr;
|
||||||
|
|
||||||
|
cudaError_t err;
|
||||||
|
|
||||||
|
// Allocate device buffers
|
||||||
|
err = cudaMalloc(&d_input, input_len);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to allocate input memory: %s\n", cudaGetErrorString(err));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_output, 32);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to allocate output memory: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_input);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate Argon2 memory once per hash
|
||||||
|
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy input header
|
||||||
|
err = cudaMemcpy(d_input, input, input_len, cudaMemcpyHostToDevice);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to copy input to device: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Launch the kernel (single thread is fine for single hash)
|
||||||
|
rinhash_cuda_kernel<<<1, 1>>>(d_input, input_len, d_output, d_memory);
|
||||||
|
|
||||||
|
// Wait
|
||||||
|
err = cudaDeviceSynchronize();
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error during kernel execution: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy result
|
||||||
|
err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to copy output from device: %s\n", cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to convert a block header to bytes
|
||||||
|
extern "C" void blockheader_to_bytes(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
const uint32_t* nonce,
|
||||||
|
uint8_t* output,
|
||||||
|
size_t* output_len
|
||||||
|
) {
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
memcpy(output + offset, version, 4); offset += 4;
|
||||||
|
memcpy(output + offset, prev_block, 32); offset += 32;
|
||||||
|
memcpy(output + offset, merkle_root, 32); offset += 32;
|
||||||
|
memcpy(output + offset, timestamp, 4); offset += 4;
|
||||||
|
memcpy(output + offset, bits, 4); offset += 4;
|
||||||
|
memcpy(output + offset, nonce, 4); offset += 4;
|
||||||
|
|
||||||
|
*output_len = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch processing version for mining (sequential per header for correctness)
|
||||||
|
extern "C" void rinhash_cuda_batch(
|
||||||
|
const uint8_t* block_headers,
|
||||||
|
size_t block_header_len,
|
||||||
|
uint8_t* outputs,
|
||||||
|
uint32_t num_blocks
|
||||||
|
) {
|
||||||
|
// Argon2 parameters
|
||||||
|
const uint32_t m_cost = 64;
|
||||||
|
|
||||||
|
// Allocate reusable device buffers
|
||||||
|
uint8_t *d_input = nullptr;
|
||||||
|
uint8_t *d_output = nullptr;
|
||||||
|
block *d_memory = nullptr;
|
||||||
|
|
||||||
|
cudaError_t err;
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_input, block_header_len);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to allocate header buffer: %s\n", cudaGetErrorString(err));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_output, 32);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to allocate output buffer: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_input);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
|
||||||
|
cudaFree(d_input);
|
||||||
|
cudaFree(d_output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_blocks; i++) {
|
||||||
|
const uint8_t* header = block_headers + i * block_header_len;
|
||||||
|
uint8_t* out = outputs + i * 32;
|
||||||
|
|
||||||
|
err = cudaMemcpy(d_input, header, block_header_len, cudaMemcpyHostToDevice);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: copy header %u failed: %s\n", i, cudaGetErrorString(err));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
rinhash_cuda_kernel<<<1, 1>>>(d_input, block_header_len, d_output, d_memory);
|
||||||
|
|
||||||
|
err = cudaDeviceSynchronize();
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error in kernel %u: %s\n", i, cudaGetErrorString(err));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cudaMemcpy(out, d_output, 32, cudaMemcpyDeviceToHost);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "HIP error: copy out %u failed: %s\n", i, cudaGetErrorString(err));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaFree(d_memory);
|
||||||
|
cudaFree(d_output);
|
||||||
|
cudaFree(d_input);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main RinHash function that would be called from outside
|
||||||
|
extern "C" void RinHash(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
const uint32_t* nonce,
|
||||||
|
uint8_t* output
|
||||||
|
) {
|
||||||
|
uint8_t block_header[80];
|
||||||
|
size_t block_header_len;
|
||||||
|
|
||||||
|
blockheader_to_bytes(
|
||||||
|
version,
|
||||||
|
prev_block,
|
||||||
|
merkle_root,
|
||||||
|
timestamp,
|
||||||
|
bits,
|
||||||
|
nonce,
|
||||||
|
block_header,
|
||||||
|
&block_header_len
|
||||||
|
);
|
||||||
|
|
||||||
|
rinhash_cuda(block_header, block_header_len, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mining function that tries different nonces (host-side best selection)
|
||||||
|
extern "C" void RinHash_mine(
|
||||||
|
const uint32_t* version,
|
||||||
|
const uint32_t* prev_block,
|
||||||
|
const uint32_t* merkle_root,
|
||||||
|
const uint32_t* timestamp,
|
||||||
|
const uint32_t* bits,
|
||||||
|
uint32_t start_nonce,
|
||||||
|
uint32_t num_nonces,
|
||||||
|
uint32_t* found_nonce,
|
||||||
|
uint8_t* target_hash,
|
||||||
|
uint8_t* best_hash
|
||||||
|
) {
|
||||||
|
const size_t block_header_len = 80;
|
||||||
|
std::vector<uint8_t> block_headers(block_header_len * num_nonces);
|
||||||
|
std::vector<uint8_t> hashes(32 * num_nonces);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_nonces; i++) {
|
||||||
|
uint32_t current_nonce = start_nonce + i;
|
||||||
|
uint8_t* header = block_headers.data() + i * block_header_len;
|
||||||
|
size_t header_len;
|
||||||
|
|
||||||
|
blockheader_to_bytes(
|
||||||
|
version,
|
||||||
|
prev_block,
|
||||||
|
merkle_root,
|
||||||
|
timestamp,
|
||||||
|
bits,
|
||||||
|
¤t_nonce,
|
||||||
|
header,
|
||||||
|
&header_len
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
rinhash_cuda_batch(block_headers.data(), block_header_len, hashes.data(), num_nonces);
|
||||||
|
|
||||||
|
memcpy(best_hash, hashes.data(), 32);
|
||||||
|
*found_nonce = start_nonce;
|
||||||
|
|
||||||
|
for (uint32_t i = 1; i < num_nonces; i++) {
|
||||||
|
uint8_t* current_hash = hashes.data() + i * 32;
|
||||||
|
bool is_better = false;
|
||||||
|
for (int j = 0; j < 32; j++) {
|
||||||
|
if (current_hash[j] < best_hash[j]) { is_better = true; break; }
|
||||||
|
else if (current_hash[j] > best_hash[j]) { break; }
|
||||||
|
}
|
||||||
|
if (is_better) {
|
||||||
|
memcpy(best_hash, current_hash, 32);
|
||||||
|
*found_nonce = start_nonce + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
8
rin/miner/gpu/RinHash-hip/rinhash_device.cuh
Normal file
8
rin/miner/gpu/RinHash-hip/rinhash_device.cuh
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
#ifndef RINHASH_DEVICE_CUH
|
||||||
|
#define RINHASH_DEVICE_CUH
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <device_launch_parameters.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#endif // RINHASH_DEVICE_CUH
|
140
rin/miner/gpu/RinHash-hip/sha3-256.hip.cu
Normal file
140
rin/miner/gpu/RinHash-hip/sha3-256.hip.cu
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#define KECCAKF_ROUNDS 24
|
||||||
|
|
||||||
|
|
||||||
|
// 64bit 値のビット回転(左回転)
|
||||||
|
__device__ inline uint64_t rotate(uint64_t x, int n) {
|
||||||
|
return (x << n) | (x >> (64 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keccak‐f[1600] 変換(内部状態 st[25] に対して 24 ラウンドの permutation を実行)
|
||||||
|
__device__ inline uint64_t ROTL64(uint64_t x, int n) {
|
||||||
|
return (x << n) | (x >> (64 - n));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void keccakf(uint64_t st[25]) {
|
||||||
|
const int R[24] = {
|
||||||
|
1, 3, 6, 10, 15, 21,
|
||||||
|
28, 36, 45, 55, 2, 14,
|
||||||
|
27, 41, 56, 8, 25, 43,
|
||||||
|
62, 18, 39, 61, 20, 44
|
||||||
|
};
|
||||||
|
|
||||||
|
const int P[24] = {
|
||||||
|
10, 7, 11, 17, 18, 3,
|
||||||
|
5, 16, 8, 21, 24, 4,
|
||||||
|
15, 23, 19, 13, 12, 2,
|
||||||
|
20, 14, 22, 9, 6, 1
|
||||||
|
};
|
||||||
|
|
||||||
|
const uint64_t RC[24] = {
|
||||||
|
0x0000000000000001ULL, 0x0000000000008082ULL,
|
||||||
|
0x800000000000808aULL, 0x8000000080008000ULL,
|
||||||
|
0x000000000000808bULL, 0x0000000080000001ULL,
|
||||||
|
0x8000000080008081ULL, 0x8000000000008009ULL,
|
||||||
|
0x000000000000008aULL, 0x0000000000000088ULL,
|
||||||
|
0x0000000080008009ULL, 0x000000008000000aULL,
|
||||||
|
0x000000008000808bULL, 0x800000000000008bULL,
|
||||||
|
0x8000000000008089ULL, 0x8000000000008003ULL,
|
||||||
|
0x8000000000008002ULL, 0x8000000000000080ULL,
|
||||||
|
0x000000000000800aULL, 0x800000008000000aULL,
|
||||||
|
0x8000000080008081ULL, 0x8000000000008080ULL,
|
||||||
|
0x0000000080000001ULL, 0x8000000080008008ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
int i, j, round;
|
||||||
|
uint64_t t, bc[5];
|
||||||
|
|
||||||
|
for (round = 0; round < 24; round++) {
|
||||||
|
// Theta
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
|
||||||
|
for (i = 0; i < 5; i++) {
|
||||||
|
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
||||||
|
for (j = 0; j < 25; j += 5)
|
||||||
|
st[j + i] ^= t;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rho and Pi
|
||||||
|
t = st[1];
|
||||||
|
for (i = 0; i < 24; i++) {
|
||||||
|
j = P[i];
|
||||||
|
bc[0] = st[j];
|
||||||
|
st[j] = ROTL64(t, R[i]);
|
||||||
|
t = bc[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chi
|
||||||
|
for (j = 0; j < 25; j += 5) {
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
bc[i] = st[j + i];
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iota
|
||||||
|
st[0] ^= RC[round];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// little-endian で 64bit 値を読み込む(8 バイトの配列から)
|
||||||
|
__device__ inline uint64_t load64_le(const uint8_t *src) {
|
||||||
|
uint64_t x = 0;
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
x |= ((uint64_t)src[i]) << (8 * i);
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// little-endian で 64bit 値を書き込む(8 バイトの配列へ)
|
||||||
|
__device__ inline void store64_le(uint8_t *dst, uint64_t x) {
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
dst[i] = (uint8_t)(x >> (8 * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
__device__ 関数 sha3_256_device
|
||||||
|
・引数 input, inlen で与えられる入力データを吸収し、
|
||||||
|
SHA3-256 仕様によりパディングおよび Keccak-f[1600] 変換を実行します。
|
||||||
|
・最終的に内部状態の先頭 32 バイト(4 ワード)を little-endian 形式で
|
||||||
|
hash_out に出力します。
|
||||||
|
・SHA3-256 ではレート(吸収部サイズ)が 136 バイトです。
|
||||||
|
*/
|
||||||
|
__device__ void sha3_256_device(const uint8_t *input, size_t inlen, uint8_t *hash_out) {
|
||||||
|
const size_t rate = 136; // SHA3-256 の吸収部サイズ(バイト単位)
|
||||||
|
uint64_t st[25] = {0}; // 内部状態(25ワード=1600ビット)
|
||||||
|
|
||||||
|
for (int i = 0; i < 25; i++) st[i] = 0;
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// 通常ブロック(rateバイト)処理(今回inlen=32なのでスキップされるはず)
|
||||||
|
while (inlen >= rate) {
|
||||||
|
// 吸収
|
||||||
|
for (int i = 0; i < (rate / 8); i++) {
|
||||||
|
st[i] ^= load64_le(input + i * 8);
|
||||||
|
}
|
||||||
|
// 最終 Keccak-f
|
||||||
|
keccakf(st);
|
||||||
|
input += rate;
|
||||||
|
inlen -= rate;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
st[i] ^= load64_le(input + i * 8); // 4 * 8 = 32バイト
|
||||||
|
}
|
||||||
|
((uint8_t*)st)[32] ^= 0x06; // パディング(32バイト目)
|
||||||
|
((uint8_t*)st)[rate - 1] ^= 0x80; // パディング(最後のバイト)
|
||||||
|
keccakf(st); // 最終 Keccak-f
|
||||||
|
|
||||||
|
|
||||||
|
// スクイーズ:出力32バイト
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
store64_le(hash_out + i * 8, st[i]);
|
||||||
|
}
|
||||||
|
}
|
@@ -20,4 +20,5 @@ cd cpuminer-opt-rinhash
|
|||||||
make -j$(nproc)
|
make -j$(nproc)
|
||||||
|
|
||||||
# Test the newly built binary
|
# Test the newly built binary
|
||||||
./cpuminer -a rinhash -o stratum+tcp://192.168.0.188:3333 -u username.workername -p x -t 4
|
./cpuminer -a rinhash -o stratum+tcp://192.168.0.188:3333 -u db.win -p x -t 4
|
||||||
|
cpuminer-rinhash.exe -a rinhash -o stratum+tcp://192.168.0.188:3334 -u db.win -p x -t 4
|
Reference in New Issue
Block a user