Merge branch 'main' of https://git.d-popov.com/popov/mines

2025-09-05 09:33:02 +03:00
parent 6f3d50ae24 f907cbf795
commit af98f80f91
89 changed files with 4728 additions and 33453 deletions
--- a/rin/miner/cpuminer/.gitattributes
+++ b/rin/miner/cpuminer/.gitattributes
@@ -1,2 +0,0 @@
 # Auto detect text files and perform LF normalization
 * text=auto
--- a/rin/miner/cpuminer/.gitignore
+++ b/rin/miner/cpuminer/.gitignore
@@ -1,52 +0,0 @@
 minerd*
 cpuminer
 *.exe
 *.o
 *.d
 gmon.out
 autom4te.cache
 .deps
 Makefile
 Makefile.in
 INSTALL
 configure.lineno
 depcomp
 missing
 install-sh
 stamp-h1
 cpuminer-config.h*
 compile
 config.log
 config.status
 config.status.lineno
 config.guess
 config.sub
 mingw32-config.cache
 */.dirstamp
 */*/.dirstamp
 */*/*/.dirstamp
 *.iml
 *.vcxproj.user
 *.opensdf
 *.sdf
 *.suo
 Release/
 Debug/
 x64/Release/
 x64/Debug/
 *.pdb/
 installer/
 res/cpuminer.aps
 res/RC*
 sign/
 sign.sh
 compat/curl-for-windows/
 .vscode/
--- a/rin/miner/cpuminer/.travis.yml
+++ b/rin/miner/cpuminer/.travis.yml
@@ -1,16 +0,0 @@
 language: c
 compiler:
  - gcc
 before_install:
  - sudo apt-get update -qq
  - sudo apt-get install libcurl4-openssl-dev
 before_script:
  - ./autogen.sh
 script:
  - ./configure --with-crypto --with-curl
  - make
  - ./cpuminer --cputest
--- a/rin/miner/cpuminer/AUTHORS
+++ b/rin/miner/cpuminer/AUTHORS
@@ -1,38 +0,0 @@
 Jeff Garzik <jgarzik@pobox.com>
 ArtForz
 pooler <pooler@litecoinpool.org>
 BlueDragon747
 1gh
 Neisklar
 prettyhatemachine
 LucasJones
 tpruvot@github
 elmad
 djm34
 palmd
 ig0tik3d
 Wolf0
 Optiminer
 Jay D Dee
 xcouiz@gmail.com
 Cryply
 Colin Percival
 Alexander Peslyak
--- a/rin/miner/cpuminer/COPYING
+++ b/rin/miner/cpuminer/COPYING
@@ -1,340 +0,0 @@
 		    GNU GENERAL PUBLIC LICENSE
 		       Version 2, June 1991
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.
 			    Preamble
  The licenses for most software are designed to take away your
 freedom to share and change it.  By contrast, the GNU General Public
 License is intended to guarantee your freedom to share and change free
 software--to make sure the software is free for all its users.  This
 General Public License applies to most of the Free Software
 Foundation's software and to any other program whose authors commit to
 using it.  (Some other Free Software Foundation software is covered by
 the GNU Library General Public License instead.)  You can apply it to
 your programs, too.
  When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
 have the freedom to distribute copies of free software (and charge for
 this service if you wish), that you receive source code or can get it
 if you want it, that you can change the software or use pieces of it
 in new free programs; and that you know you can do these things.
  To protect your rights, we need to make restrictions that forbid
 anyone to deny you these rights or to ask you to surrender the rights.
 These restrictions translate to certain responsibilities for you if you
 distribute copies of the software, or if you modify it.
  For example, if you distribute copies of such a program, whether
 gratis or for a fee, you must give the recipients all the rights that
 you have.  You must make sure that they, too, receive or can get the
 source code.  And you must show them these terms so they know their
 rights.
  We protect your rights with two steps: (1) copyright the software, and
 (2) offer you this license which gives you legal permission to copy,
 distribute and/or modify the software.
  Also, for each author's protection and ours, we want to make certain
 that everyone understands that there is no warranty for this free
 software.  If the software is modified by someone else and passed on, we
 want its recipients to know that what they have is not the original, so
 that any problems introduced by others will not reflect on the original
 authors' reputations.
  Finally, any free program is threatened constantly by software
 patents.  We wish to avoid the danger that redistributors of a free
 program will individually obtain patent licenses, in effect making the
 program proprietary.  To prevent this, we have made it clear that any
 patent must be licensed for everyone's free use or not licensed at all.
  The precise terms and conditions for copying, distribution and
 modification follow.
 		    GNU GENERAL PUBLIC LICENSE
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  0. This License applies to any program or other work which contains
 a notice placed by the copyright holder saying it may be distributed
 under the terms of this General Public License.  The "Program", below,
 refers to any such program or work, and a "work based on the Program"
 means either the Program or any derivative work under copyright law:
 that is to say, a work containing the Program or a portion of it,
 either verbatim or with modifications and/or translated into another
 language.  (Hereinafter, translation is included without limitation in
 the term "modification".)  Each licensee is addressed as "you".
 Activities other than copying, distribution and modification are not
 covered by this License; they are outside its scope.  The act of
 running the Program is not restricted, and the output from the Program
 is covered only if its contents constitute a work based on the
 Program (independent of having been made by running the Program).
 Whether that is true depends on what the Program does.
  1. You may copy and distribute verbatim copies of the Program's
 source code as you receive it, in any medium, provided that you
 conspicuously and appropriately publish on each copy an appropriate
 copyright notice and disclaimer of warranty; keep intact all the
 notices that refer to this License and to the absence of any warranty;
 and give any other recipients of the Program a copy of this License
 along with the Program.
 You may charge a fee for the physical act of transferring a copy, and
 you may at your option offer warranty protection in exchange for a fee.
  2. You may modify your copy or copies of the Program or any portion
 of it, thus forming a work based on the Program, and copy and
 distribute such modifications or work under the terms of Section 1
 above, provided that you also meet all of these conditions:
    a) You must cause the modified files to carry prominent notices
    stating that you changed the files and the date of any change.
    b) You must cause any work that you distribute or publish, that in
    whole or in part contains or is derived from the Program or any
    part thereof, to be licensed as a whole at no charge to all third
    parties under the terms of this License.
    c) If the modified program normally reads commands interactively
    when run, you must cause it, when started running for such
    interactive use in the most ordinary way, to print or display an
    announcement including an appropriate copyright notice and a
    notice that there is no warranty (or else, saying that you provide
    a warranty) and that users may redistribute the program under
    these conditions, and telling the user how to view a copy of this
    License.  (Exception: if the Program itself is interactive but
    does not normally print such an announcement, your work based on
    the Program is not required to print an announcement.)
 These requirements apply to the modified work as a whole.  If
 identifiable sections of that work are not derived from the Program,
 and can be reasonably considered independent and separate works in
 themselves, then this License, and its terms, do not apply to those
 sections when you distribute them as separate works.  But when you
 distribute the same sections as part of a whole which is a work based
 on the Program, the distribution of the whole must be on the terms of
 this License, whose permissions for other licensees extend to the
 entire whole, and thus to each and every part regardless of who wrote it.
 Thus, it is not the intent of this section to claim rights or contest
 your rights to work written entirely by you; rather, the intent is to
 exercise the right to control the distribution of derivative or
 collective works based on the Program.
 In addition, mere aggregation of another work not based on the Program
 with the Program (or with a work based on the Program) on a volume of
 a storage or distribution medium does not bring the other work under
 the scope of this License.
  3. You may copy and distribute the Program (or a work based on it,
 under Section 2) in object code or executable form under the terms of
 Sections 1 and 2 above provided that you also do one of the following:
    a) Accompany it with the complete corresponding machine-readable
    source code, which must be distributed under the terms of Sections
    1 and 2 above on a medium customarily used for software interchange; or,
    b) Accompany it with a written offer, valid for at least three
    years, to give any third party, for a charge no more than your
    cost of physically performing source distribution, a complete
    machine-readable copy of the corresponding source code, to be
    distributed under the terms of Sections 1 and 2 above on a medium
    customarily used for software interchange; or,
    c) Accompany it with the information you received as to the offer
    to distribute corresponding source code.  (This alternative is
    allowed only for noncommercial distribution and only if you
    received the program in object code or executable form with such
    an offer, in accord with Subsection b above.)
 The source code for a work means the preferred form of the work for
 making modifications to it.  For an executable work, complete source
 code means all the source code for all modules it contains, plus any
 associated interface definition files, plus the scripts used to
 control compilation and installation of the executable.  However, as a
 special exception, the source code distributed need not include
 anything that is normally distributed (in either source or binary
 form) with the major components (compiler, kernel, and so on) of the
 operating system on which the executable runs, unless that component
 itself accompanies the executable.
 If distribution of executable or object code is made by offering
 access to copy from a designated place, then offering equivalent
 access to copy the source code from the same place counts as
 distribution of the source code, even though third parties are not
 compelled to copy the source along with the object code.
  4. You may not copy, modify, sublicense, or distribute the Program
 except as expressly provided under this License.  Any attempt
 otherwise to copy, modify, sublicense or distribute the Program is
 void, and will automatically terminate your rights under this License.
 However, parties who have received copies, or rights, from you under
 this License will not have their licenses terminated so long as such
 parties remain in full compliance.
  5. You are not required to accept this License, since you have not
 signed it.  However, nothing else grants you permission to modify or
 distribute the Program or its derivative works.  These actions are
 prohibited by law if you do not accept this License.  Therefore, by
 modifying or distributing the Program (or any work based on the
 Program), you indicate your acceptance of this License to do so, and
 all its terms and conditions for copying, distributing or modifying
 the Program or works based on it.
  6. Each time you redistribute the Program (or any work based on the
 Program), the recipient automatically receives a license from the
 original licensor to copy, distribute or modify the Program subject to
 these terms and conditions.  You may not impose any further
 restrictions on the recipients' exercise of the rights granted herein.
 You are not responsible for enforcing compliance by third parties to
 this License.
  7. If, as a consequence of a court judgment or allegation of patent
 infringement or for any other reason (not limited to patent issues),
 conditions are imposed on you (whether by court order, agreement or
 otherwise) that contradict the conditions of this License, they do not
 excuse you from the conditions of this License.  If you cannot
 distribute so as to satisfy simultaneously your obligations under this
 License and any other pertinent obligations, then as a consequence you
 may not distribute the Program at all.  For example, if a patent
 license would not permit royalty-free redistribution of the Program by
 all those who receive copies directly or indirectly through you, then
 the only way you could satisfy both it and this License would be to
 refrain entirely from distribution of the Program.
 If any portion of this section is held invalid or unenforceable under
 any particular circumstance, the balance of the section is intended to
 apply and the section as a whole is intended to apply in other
 circumstances.
 It is not the purpose of this section to induce you to infringe any
 patents or other property right claims or to contest validity of any
 such claims; this section has the sole purpose of protecting the
 integrity of the free software distribution system, which is
 implemented by public license practices.  Many people have made
 generous contributions to the wide range of software distributed
 through that system in reliance on consistent application of that
 system; it is up to the author/donor to decide if he or she is willing
 to distribute software through any other system and a licensee cannot
 impose that choice.
 This section is intended to make thoroughly clear what is believed to
 be a consequence of the rest of this License.
  8. If the distribution and/or use of the Program is restricted in
 certain countries either by patents or by copyrighted interfaces, the
 original copyright holder who places the Program under this License
 may add an explicit geographical distribution limitation excluding
 those countries, so that distribution is permitted only in or among
 countries not thus excluded.  In such case, this License incorporates
 the limitation as if written in the body of this License.
  9. The Free Software Foundation may publish revised and/or new versions
 of the General Public License from time to time.  Such new versions will
 be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.
 Each version is given a distinguishing version number.  If the Program
 specifies a version number of this License which applies to it and "any
 later version", you have the option of following the terms and conditions
 either of that version or of any later version published by the Free
 Software Foundation.  If the Program does not specify a version number of
 this License, you may choose any version ever published by the Free Software
 Foundation.
  10. If you wish to incorporate parts of the Program into other free
 programs whose distribution conditions are different, write to the author
 to ask for permission.  For software which is copyrighted by the Free
 Software Foundation, write to the Free Software Foundation; we sometimes
 make exceptions for this.  Our decision will be guided by the two goals
 of preserving the free status of all derivatives of our free software and
 of promoting the sharing and reuse of software generally.
 			    NO WARRANTY
  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
 FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
 OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
 PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
 TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
 PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
 REPAIR OR CORRECTION.
  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
 REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
 INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
 OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
 TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
 YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
 PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGES.
 		     END OF TERMS AND CONDITIONS
 	    How to Apply These Terms to Your New Programs
  If you develop a new program, and you want it to be of the greatest
 possible use to the public, the best way to achieve this is to make it
 free software which everyone can redistribute and change under these terms.
  To do so, attach the following notices to the program.  It is safest
 to attach them to the start of each source file to most effectively
 convey the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Also add information on how to contact you by electronic and paper mail.
 If the program is interactive, make it output a short notice like this
 when it starts in an interactive mode:
    Gnomovision version 69, Copyright (C) year  name of author
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.
 The hypothetical commands `show w' and `show c' should show the appropriate
 parts of the General Public License.  Of course, the commands you use may
 be called something other than `show w' and `show c'; they could even be
 mouse-clicks or menu items--whatever suits your program.
 You should also get your employer (if you work as a programmer) or your
 school, if any, to sign a "copyright disclaimer" for the program, if
 necessary.  Here is a sample; alter the names:
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
  <signature of Ty Coon>, 1 April 1989
  Ty Coon, President of Vice
 This General Public License does not permit incorporating your program into
 proprietary programs.  If your program is a subroutine library, you may
 consider it more useful to permit linking proprietary applications with the
 library.  If this is what you want to do, use the GNU Library General
 Public License instead of this License.
--- a/rin/miner/cpuminer/ChangeLog
+++ b/rin/miner/cpuminer/ChangeLog
@@ -1 +0,0 @@
 See git repository ('git log') for full changelog.
--- a/rin/miner/cpuminer/Dockerfile
+++ b/rin/miner/cpuminer/Dockerfile
@@ -1,35 +0,0 @@
 #
 # Dockerfile for cpuminer-opt
 # usage: docker build -t cpuminer-opt:latest .
 # run: docker run -it --rm cpuminer-opt:latest [ARGS]
 # ex: docker run -it --rm cpuminer-opt:latest -a cryptonight -o cryptonight.eu.nicehash.com:3355 -u 1MiningDW2GKzf4VQfmp4q2XoUvR6iy6PD.worker1 -p x -t 3
 #
 # Build
 FROM ubuntu:16.04 as builder
 RUN apt-get update \
  && apt-get install -y \
    build-essential \
    libssl-dev \
    libgmp-dev \
    libcurl4-openssl-dev \
    libjansson-dev \
    automake \
  && rm -rf /var/lib/apt/lists/*
 COPY . /app/
 RUN cd /app/ && ./build.sh
 # App
 FROM ubuntu:16.04
 RUN apt-get update \
  && apt-get install -y \
    libcurl3 \
    libjansson4 \
  && rm -rf /var/lib/apt/lists/*
 COPY --from=builder /app/cpuminer .
 ENTRYPOINT ["./cpuminer"]
 CMD ["-h"]
--- a/rin/miner/cpuminer/INSTALL_LINUX
+++ b/rin/miner/cpuminer/INSTALL_LINUX
@@ -1,164 +0,0 @@
 These instructions may be out of date, see the Wiki for the latest...
 https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
 1. Requirements:
 ---------------
 Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
 supported.
 64 bit Linux operating system. Apple is not supported.
 2. Building on linux prerequisites:
 -----------------------------------
 It is assumed users know how to install packages on their system and
 be able to compile standard source packages. This is basic Linux and
 beyond the scope of cpuminer-opt. Regardless compiling is trivial if you
 follow the instructions.
 Make sure you have the basic development packages installed.
 Here is a good start:
 http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
 Install any additional dependencies needed by cpuminer-opt. The list below
 are some of the ones that may not be in the default install and need to
 be installed manually. There may be others, read the compiler error messages,
 they will give a clue as to the missing package.
 The following command should install everything you need on Debian based
 distributions such as Ubuntu. Fedora and other distributions may have similar
 but different package names. 
 $ sudo apt-get install build-essential automake libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev zlib1g-dev git
 SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and
 openssl 1.1.0e or higher.
 znver1 and znver2 should be recognized on most recent version of GCC and
 znver3 is available with GCC 11. GCC 11 also includes rocketlake support.
 In the meantime here are some suggestions to compile with new CPUs:
 "-march=native" is usually the best choice, used by build.sh.
 "-march=znver2 -mvaes" can be used for Ryzen 5000 if znver3 is not recongized.
 "-mcascadelake -msha" or
 "-mcometlake -mavx512 -msha" can be used for Rocket Lake.
 Features can also be added individually:
 "-msha" adds support for HW accelerated sha256.
 "-mavx512" adds support for 512 bit vectors
 "-mvaes" add support for parallel AES
 Additional instructions for static compilalation can be found here:
 https://lxadm.com/Static_compilation_of_cpuminer
 Static builds should only considered in a homogeneous HW and SW environment.
 Local builds will always have the best performance and compatibility.
 3. Download cpuminer-opt
 ------------------------
 Download the source code for the latest realease from the official repository.
 https://github.com/JayDDee/cpuminer-opt/releases
 Extract the source code.
 $ tar xvzf cpuminer-opt-x.y.z.tar.gz
 Alternatively it can be cloned from git.
 $ git clone https://github.com/JayDDee/cpuminer-opt.git
 4. Build cpuminer-opt
 ---------------------
 It is recomended to Build with default options, this will usuallly
 produce the best results.
 $ ./build.sh to build on Linux or execute the following commands.
 or 
 $ ./autogen.sh
 $ CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
 $ make -j n
 n is the number of threads.
 5. Start mining.
 ----------------
 $ ./cpuminer -a algo -o url -u username -p password
 Windows
 -------
 See also INSTAL_WINDOWS
 The following procedure is obsolete and uses an old compiler.
 Precompiled Windows binaries are built on a Linux host using Mingw
 with a more recent compiler than the following Windows hosted procedure.
 Building on Windows prerequisites:
 msys
 mingw_w64
 Visual C++ redistributable 2008 X64
 openssl
 Install msys and mingw_w64, only needed once.
 Unpack msys into C:\msys or your preferred directory.
 Install mingw_w64 from win-builds.
 Follow instructions, check "msys or cygwin" and "x86_64" and accept default
 existing msys instalation.
 Open a msys shell by double clicking on msys.bat.
 Note that msys shell uses linux syntax for file specifications, "C:\" is
 mounted at "/c/".
 Add mingw bin directory to PATH variable
 PATH="/c/msys/opt/windows_64/bin/:$PATH"
 Instalation complete, compile cpuminer-opt.
 Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
 or similar Windows program.
 In msys shell cd to miner directory.
 cd /c/path/to/cpuminer-opt
 Run build.sh to build on Windows or execute the following commands.
 ./autogen.sh
 CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
 make
 Start mining
 cpuminer.exe -a algo -o url -u user -p password
 The following tips may be useful for older AMD CPUs.
 AMD CPUs older than Steamroller, including Athlon x2 and Phenom II x4, are
 not supported by cpuminer-opt due to an incompatible implementation of SSE2
 on these CPUs. Some algos may crash the miner with an invalid instruction.
 Users are recommended to use an unoptimized miner such as cpuminer-multi.
 Some users with AMD CPUs without AES_NI have reported problems compiling
 with build.sh or "-march=native". Problems have included compile errors
 and poor performance. These users are recommended to compile manually
 specifying "-march=btver1" on the configure command line.
 Support for even older x86_64 without AES_NI or SSE2 is not availble.
--- a/rin/miner/cpuminer/INSTALL_WINDOWS
+++ b/rin/miner/cpuminer/INSTALL_WINDOWS
@@ -1,4 +0,0 @@
 Please consult the wiki for Windows compile instructions.
 https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
--- a/rin/miner/cpuminer/LICENSE
+++ b/rin/miner/cpuminer/LICENSE
@@ -1,3 +0,0 @@
 cpuminer is available under the terms of the GNU Public License version 2.
 See COPYING for details.
--- a/rin/miner/cpuminer/Makefile.am
+++ b/rin/miner/cpuminer/Makefile.am
@@ -1,343 +0,0 @@
 if HAVE_APPLE
 # MacOS uses Homebrew to install needed packages but they aren't linked for
 # the jansson test in configure. Ignore the failed test & link them now,
 # different path for different CPU arch.
 if ARCH_ARM64
  EXTRA_INCLUDES = -I/opt/homebrew/include
  EXTRA_LIBS     = -L/opt/homebrew/lib
 else
  EXTRA_INCLUDES = -I/usr/local/include
  EXTRA_LIBS     = -L/usr/local/lib
 endif
 else
 if WANT_JANSSON
 # Can't find jansson libraries, compile the included source code.
  EXTRA_INCLUDES = -I$(top_srcdir)/compat/jansson
  EXTRA_LIBS     = -L$(top_srcdir)/compat/jansson
 else
  EXTRA_INCLUDES =
  EXTRA_LIBS     =
 endif
 endif
 EXTRA_DIST = example-cfg.json nomacro.pl
 SUBDIRS = compat
 ALL_INCLUDES = @PTHREAD_FLAGS@ -fno-strict-aliasing $(EXTRA_INCLUDES) -I.
 bin_PROGRAMS = cpuminer
 dist_man_MANS = cpuminer.1
 cpuminer_SOURCES = \
  dummy.cpp \
  cpu-miner.c \
  util.c \
  api.c \
  sysinfos.c \
  algo-gate-api.c\
  malloc-huge.c \
  algo/argon2d/argon2d-gate.c \
  algo/argon2d/blake2/blake2b.c \
  algo/argon2d/argon2d/argon2.c \
  algo/argon2d/argon2d/core.c \
  algo/argon2d/argon2d/opt.c \
  algo/argon2d/argon2d/argon2d_thread.c \
  algo/argon2d/argon2d/encoding.c \
  algo/blake/sph_blake.c \
  algo/blake/blake256-hash.c \
  algo/blake/blake512-hash.c \
  algo/blake/blake-gate.c \
  algo/blake/blake.c \
  algo/blake/blake-4way.c \
  algo/blake/sph_blake2b.c \
  algo/blake/sph-blake2s.c \
  algo/blake/blake2s-hash.c \
  algo/blake/blake2s.c \
  algo/blake/blake2b-hash.c \
  algo/blake/blake2b.c \
  algo/blake/blakecoin-gate.c \
  algo/blake/mod_blakecoin.c \
  algo/blake/blakecoin.c \
  algo/blake/blakecoin-4way.c \
  algo/blake/pentablake-gate.c \
  algo/blake/pentablake-4way.c \
  algo/blake/pentablake.c \
  algo/bmw/sph_bmw.c \
  algo/bmw/bmw256-hash-4way.c \
  algo/bmw/bmw512-hash-4way.c \
  algo/bmw/bmw256.c \
  algo/bmw/bmw512-gate.c \
  algo/bmw/bmw512.c \
  algo/bmw/bmw512-4way.c \
  algo/cubehash/cubehash_sse2.c\
  algo/cubehash/cube-hash-2way.c \
  algo/cubehash/sph_cubehash.c \
  algo/echo/sph_echo.c \
  algo/echo/echo-hash-4way.c \
  algo/echo/aes_ni/hash.c\
  algo/gost/sph_gost.c \
  algo/groestl/groestl-gate.c \
  algo/groestl/groestl512-hash-4way.c \
  algo/groestl/groestl256-hash-4way.c \
  algo/groestl/sph_groestl.c \
  algo/groestl/groestl.c \
  algo/groestl/groestl-4way.c \
  algo/groestl/myrgr-gate.c \
  algo/groestl/myrgr-4way.c \
  algo/groestl/myr-groestl.c \
  algo/groestl/aes_ni/hash-groestl.c \
  algo/groestl/aes_ni/hash-groestl256.c \
  algo/fugue/sph_fugue.c \
  algo/fugue/fugue-aesni.c \
  algo/hamsi/sph_hamsi.c \
  algo/hamsi/hamsi-hash-4way.c \
  algo/haval/haval.c \
  algo/haval/haval-hash-4way.c \
  algo/jh/sph_jh.c \
  algo/jh/jh-hash-4way.c \
  algo/jh/jha-gate.c \
  algo/jh/jha-4way.c \
  algo/jh/jha.c \
  algo/keccak/sph_keccak.c \
  algo/keccak/keccak.c\
  algo/keccak/keccak-hash-4way.c \
  algo/keccak/keccak-4way.c\
  algo/keccak/keccak-gate.c \
  algo/keccak/sha3d-4way.c \
  algo/keccak/sha3d.c \
  algo/lanehash/lane.c \
  algo/luffa/luffa_for_sse2.c \
  algo/luffa/luffa-hash-2way.c \
  algo/luffa/sph_luffa.c \
  algo/lyra2/lyra2.c \
  algo/lyra2/sponge.c \
  algo/lyra2/sponge-2way.c \
  algo/lyra2/lyra2-hash-2way.c \
  algo/lyra2/lyra2-gate.c \
  algo/lyra2/lyra2rev2.c \
  algo/lyra2/lyra2rev2-4way.c \
  algo/lyra2/lyra2rev3.c \
  algo/lyra2/lyra2rev3-4way.c \
  algo/lyra2/lyra2re.c \
  algo/lyra2/lyra2z-4way.c \
  algo/lyra2/lyra2z330.c \
  algo/lyra2/lyra2h.c \
  algo/lyra2/lyra2h-4way.c \
  algo/lyra2/allium-4way.c \
  algo/lyra2/phi2-4way.c \
  algo/lyra2/phi2.c \
  algo/m7m/m7m.c \
  algo/nist5/nist5-gate.c \
  algo/nist5/nist5-4way.c \
  algo/nist5/nist5.c \
  algo/nist5/zr5.c \
  algo/panama/panama-hash-4way.c \
  algo/panama/sph_panama.c \
  algo/quark/quark-gate.c \
  algo/quark/quark.c \
  algo/quark/quark-4way.c \
  algo/quark/anime-gate.c \
  algo/quark/anime.c \
  algo/quark/anime-4way.c \
  algo/quark/hmq1725-gate.c \
  algo/quark/hmq1725-4way.c \
  algo/quark/hmq1725.c \
  algo/qubit/qubit-gate.c \
  algo/qubit/qubit.c \
  algo/qubit/qubit-2way.c \
  algo/qubit/deep-gate.c \
  algo/qubit/deep-2way.c \
  algo/qubit/deep.c \
  algo/ripemd/sph_ripemd.c \
  algo/ripemd/ripemd-hash-4way.c \
  algo/ripemd/lbry-gate.c \
  algo/ripemd/lbry.c \
  algo/ripemd/lbry-4way.c \
  algo/scrypt/scrypt.c \
  algo/scrypt/scrypt-core-4way.c \
  algo/scrypt/neoscrypt.c \
  algo/sha/sha1.c \
  algo/sha/sha1-hash.c \
  algo/sha/sha256-hash.c \
  algo/sha/sph_sha2.c \
  algo/sha/sph_sha2big.c \
  algo/sha/sha256-hash-4way.c \
  algo/sha/sha512-hash-4way.c \
  algo/sha/hmac-sha256-hash.c \
  algo/sha/hmac-sha256-hash-4way.c \
  algo/sha/sha256d.c \
  algo/sha/sha256d-4way.c \
  algo/sha/sha256t-gate.c \
  algo/sha/sha256t-4way.c \
  algo/sha/sha256q-4way.c \
  algo/sha/sha256q.c \
  algo/sha/sha512256d-4way.c \
  algo/sha/sha256dt.c \
  algo/shabal/sph_shabal.c \
  algo/shabal/shabal-hash-4way.c \
  algo/shavite/sph_shavite.c \
  algo/shavite/sph-shavite-aesni.c \
  algo/shavite/shavite-hash-2way.c \
  algo/shavite/shavite-hash-4way.c \
  algo/simd/sph_simd.c \
  algo/simd/simd-hash-2way.c \
  algo/skein/sph_skein.c \
  algo/skein/skein-hash-4way.c \
  algo/skein/skein.c \
  algo/skein/skein-4way.c \
  algo/skein/skein-gate.c \
  algo/skein/skein2.c \
  algo/skein/skein2-4way.c \
  algo/sm3/sm3.c \
  algo/sm3/sm3-hash-4way.c \
  algo/swifftx/swifftx.c \
  algo/tiger/sph_tiger.c \
  algo/verthash/verthash-gate.c \
  algo/verthash/Verthash.c \
  algo/verthash/fopen_utf8.c \
  algo/verthash/tiny_sha3/sha3.c \
  algo/verthash/tiny_sha3/sha3-4way.c \
  algo/whirlpool/sph_whirlpool.c \
  algo/whirlpool/whirlpool-gate.c \
  algo/whirlpool/whirlpool.c \
  algo/whirlpool/whirlpoolx.c \
  algo/x11/x11-gate.c \
  algo/x11/x11.c \
  algo/x11/x11-4way.c \
  algo/x11/x11gost-gate.c \
  algo/x11/x11gost.c \
  algo/x11/x11gost-4way.c \
  algo/x11/c11-gate.c \
  algo/x11/c11.c \
  algo/x11/c11-4way.c \
  algo/x11/tribus-gate.c \
  algo/x11/tribus.c \
  algo/x11/tribus-4way.c \
  algo/x11/timetravel-gate.c \
  algo/x11/timetravel.c \
  algo/x11/timetravel-4way.c \
  algo/x11/timetravel10-gate.c \
  algo/x11/timetravel10.c \
  algo/x11/timetravel10-4way.c \
  algo/x11/x11evo.c \
  algo/x11/x11evo-4way.c \
  algo/x11/x11evo-gate.c \
  algo/x12/x12-gate.c \
  algo/x12/x12.c \
  algo/x12/x12-4way.c \
  algo/x13/x13-gate.c \
  algo/x13/x13.c \
  algo/x13/x13-4way.c \
  algo/x13/x13sm3-gate.c \
  algo/x13/x13sm3.c \
  algo/x13/x13sm3-4way.c \
  algo/x13/phi1612-gate.c \
  algo/x13/phi1612.c \
  algo/x13/phi1612-4way.c \
  algo/x13/skunk-gate.c \
  algo/x13/skunk-4way.c \
  algo/x13/skunk.c \
  algo/x13/x13bcd-4way.c \
  algo/x13/x13bcd.c \
  algo/x14/x14-gate.c \
  algo/x14/x14.c \
  algo/x14/x14-4way.c \
  algo/x14/veltor-gate.c \
  algo/x14/veltor.c \
  algo/x14/veltor-4way.c \
  algo/x14/polytimos-gate.c \
  algo/x14/polytimos.c \
  algo/x14/polytimos-4way.c \
  algo/x14/axiom.c \
  algo/x15/x15-gate.c \
  algo/x15/x15.c \
  algo/x15/x15-4way.c \
  algo/x16/x16r-gate.c \
  algo/x16/x16r.c \
  algo/x16/x16r-4way.c \
  algo/x16/x16rv2.c \
  algo/x16/x16rv2-4way.c \
  algo/x16/x16rt.c \
  algo/x16/x16rt-4way.c \
  algo/x16/hex.c \
  algo/x16/x20r.c \
  algo/x16/x21s-4way.c \
  algo/x16/x21s.c \
  algo/x16/minotaur.c \
  algo/x17/x17-gate.c \
  algo/x17/x17.c \
  algo/x17/x17-4way.c \
  algo/x17/xevan-gate.c \
  algo/x17/xevan.c \
  algo/x17/xevan-4way.c \
  algo/x17/sonoa-gate.c \
  algo/x17/sonoa-4way.c \
  algo/x17/sonoa.c \
  algo/x22/x22i-4way.c \
  algo/x22/x22i.c \
  algo/x22/x22i-gate.c \
  algo/x22/x25x.c \
  algo/x22/x25x-4way.c \
  algo/yespower/yespower-gate.c \
  algo/yespower/yespower-blake2b.c \
  algo/yespower/crypto/hmac-blake2b.c \
  algo/yespower/yescrypt-r8g.c \
  algo/yespower/yespower-opt.c \
  algo/yespower/yespower-ref.c \
  algo/yespower/yespower-blake2b-ref.c \
  algo/rinhash/rinhash.c \
  algo/rinhash/blake3/blake3.c \
  algo/rinhash/blake3/blake3_dispatch.c \
  algo/rinhash/blake3/blake3_portable.c \
  algo/rinhash/blake3/blake3_sse2_x86-64_unix.S \
  algo/rinhash/blake3/blake3_sse41_x86-64_unix.S \
  algo/rinhash/blake3/blake3_avx2_x86-64_unix.S \
  algo/rinhash/blake3/blake3_avx512_x86-64_unix.S \
  algo/rinhash/sha3/SimpleFIPS202.c \
  algo/rinhash/sha3/KeccakSponge.c \
  algo/rinhash/sha3/KeccakP-1600-reference.c 
 if HAVE_WINDOWS
   cpuminer_SOURCES += compat/winansi.c
 endif
 if USE_ASM
   disable_flags =
   cpuminer_SOURCES += asm/neoscrypt_asm.S
 else
   disable_flags = -DNOASM
 endif
 cpuminer_LDFLAGS = @LDFLAGS@
 cpuminer_LDADD	= $(EXTRA_LIBS) @LIBCURL@ -ljansson @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
 cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES) -DXKCP_has_KeccakP1600
 cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41  $(disable_flags)
 if ARCH_ARM64
   cpuminer_CFLAGS += -flax-vector-conversions
 endif
 if HAVE_WINDOWS
 # use to profile an object
 # gprof_cflags = -pg -g3
 # cpuminer_LDFLAGS += -pg
 # cpuminer_CFLAGS += -fno-inline-functions -static
 # copy/paste from generated Makefile
 common_ccflags = $(DEFS) $(ALL_INCLUDES) $(cpuminer_CPPFLAGS) $(CPPFLAGS) $(cpuminer_CFLAGS) $(CFLAGS)
 # special CFLAGS (if you find a simpler way to do that tell me ;)
 cpuminer-neoscrypt.o: neoscrypt.c
 	@echo "CUSTOM ${@}: ${filter %.o,${^}} ${filter %.c,${^}}"
 	$(CC) $(common_ccflags) -g -O3 $(gprof_cflags) -MT $@ -MD -MP -c -o $@ $<
 endif
--- a/rin/miner/cpuminer/README.md
+++ b/rin/miner/cpuminer/README.md
@@ -1,213 +0,0 @@
 cpuminer-opt is a fork of cpuminer-multi by TPruvot with optimizations
 imported from other miners developped by lucas Jones, djm34, Wolf0, pooler,
 Jeff garzik, ig0tik3d, elmad, palmd, and Optiminer, with additional
 optimizations by Jay D Dee.
 All of the code is believed to be open and free. If anyone has a
 claim to any of it post your case in the cpuminer-opt Bitcoin Talk forum
 or by email.
 Miner programs are often flagged as malware by antivirus programs. This is
 a false positive, they are flagged simply because they are cryptocurrency 
 miners. The source code is open for anyone to inspect. If you don't trust 
 the software, don't use it.
 New thread:
 https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
 Old thread:
 https://bitcointalk.org/index.php?topic=1326803.0
 mailto://jayddee246@gmail.com
 This note is to confirm that bitcointalk users JayDDee and joblo are the
 same person.
 I created a new BCT user JayDDee to match my github user id.
 The old thread has been locked but still contains useful information for
 reading.
 See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
 for compile instructions.
 Requirements
 ------------
 1. A 64 bit CPU supporting x86_64 (Intel or AMD) or aarch64 (ARM).
 x86_64 requires SSE2, aarch64 requires armv8 & NEON.
 Mobile CPUs like laptop computers are not recommended because they aren't
 designed for extreme heat of operating at full load for extended periods of
 time.
 2. 64 bit operating system including Linux, Windows, MacOS, or BSD.
 Android, IOS and alt OSs like Haiku & ReactOS are not supported.
 3. Stratum pool supporting stratum+tcp:// or stratum+ssl:// protocols or
 RPC getblockte,plate using http:// or https://.
 Supported Algorithms
 --------------------
                          allium        Garlicoin
                          anime         Animecoin
                          argon2        Argon2 coin (AR2)
                          argon2d250    
                          argon2d500
                          argon2d4096
                          blake         Blake-256
                          blake2b       Blake2-512
                          blake2s       Blake2-256
                          blakecoin     blake256r8
                          bmw           BMW 256
                          bmw512        BMW 512
                          c11           
                          decred
                          deep          Deepcoin (DCN)
                          dmd-gr        Diamond-Groestl
                          groestl       Groestl coin
                          hex           x16r-hex
                          hmq1725       
                          jha           Jackpotcoin
                          keccak        Maxcoin
                          keccakc       Creative coin
                          lbry          LBC, LBRY Credits
                          lyra2h        
                          lyra2re       lyra2
                          lyra2rev2     lyra2v2
                          lyra2rev3     lyrav2v3
                          lyra2z        
                          lyra2z330     
                          m7m           
                          minotaur 
                          minotaurx
                          myr-gr        Myriad-Groestl
                          neoscrypt     NeoScrypt(128, 2, 1)
                          nist5         Nist5
                          pentablake    Pentablake
                          phi1612       phi
                          phi2          
                          polytimos     Ninja
                          power2b       MicroBitcoin (MBC)
                          quark         Quark
                          qubit         Qubit
                          scrypt        scrypt(1024, 1, 1) (default)
                          scrypt:N      scrypt(N, 1, 1)
                          scryptn2      scrypt(1048576, 1, 1)
                          sha256d       Double SHA-256
                          sha256dt
                          sha256q       Quad SHA-256
                          sha256t       Triple SHA-256
                          sha3d         Double keccak256 (BSHA3)
                          sha512256d
                          skein         Skein+Sha (Skeincoin)
                          skein2        Double Skein (Woodcoin)
                          skunk         Signatum (SIGT)
                          sonoa         Sono
                          timetravel    Machinecoin (MAC)
                          timetravel10  Bitcore
                          tribus        Denarius (DNR)
                          vanilla       blake256r8vnl (VCash)
                          veltor        (VLT)
                          verthash      Vertcoin
                          whirlpool
                          whirlpoolx
                          x11           Dash
                          x11evo        Revolvercoin
                          x11gost       sib (SibCoin)
                          x12           
                          x13           
                          x13bcd        bcd
                          x13sm3        hsr (Hshare)
                          x14           
                          x15           
                          x16r          
                          x16rv2        
                          x16rt         
                          x16rt-veil    veil
                          x16s          
                          x17
                          x20r
                          x21s
                          x22i
                          x25x
                          xevan         Bitsend (BSD)
                          yescrypt      Globalboost-Y (BSTY)
                          yescryptr8    BitZeny (ZNY)
                          yescryptr8g   Koto (KOTO)
                          yescryptr16   Eli
                          yescryptr32   WAVI
                          yespower      Cryply
                          yespowerr16   Yenten (YTN)
                          yespower-b2b  generic yespower + blake2b
                          zr5           Ziftr
                          rinhash       RinHash
 Many variations of scrypt based algos can be mine by specifying their
 parameters:
 scryptn2: --algo scrypt --param-n 1048576
 cpupower: --algo yespower --param-key "CPUpower: The number of CPU working or available for proof-of-work mining"
 power2b: --algo yespower-b2b --param-n 2048 --param-r 32 --param-key "Now I am become Death, the destroyer of worlds"
 sugarchain: --algo yespower --param-n 2048 -param-r 32 --param-key "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"
 yespoweriots: --algo yespower --param-n 2048 --param-key "Iots is committed to the development of IOT"
 yespowerlitb: --algo yespower --param-n 2048 --param-r 32 --param-key "LITBpower: The number of LITB working or available for proof-of-work mini"
 yespoweric: --algo yespower --param-n 2048 --param-r 32 --param-key "IsotopeC" 
 yespowerurx: --algo yespower --param-n 2048 --param-r 32 --param-key "UraniumX"
 yespowerltncg: --algo yespower --param-n 2048 --param-r 32 --param-key "LTNCGYES"
 Errata
 ------
 Old algorithms that are no longer used frequently will not have the latest
 optimizations.
 Cryptonight and variants are no longer supported, use another miner.
 Neoscrypt crashes on Windows, use legacy version.
 AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
 supported by cpuminer-opt due to an incompatible implementation of SSE2 on
 these CPUs. Some algos may crash the miner with an invalid instruction.
 Users are recommended to use an unoptimized miner such as cpuminer-multi.
 cpuminer-opt does not work mining Decred algo at Nicehash and produces
 only "invalid extranonce2 size" rejects.
 Benchmark testing does not work for x11evo.
 Bugs
 ----
 Users are encouraged to post their bug reports using git issues or on the
 Bitcoin Talk forum or opening an issue in git:
 https://bitcointalk.org/index.php?topic=1326803.0
 https://github.com/JayDDee/cpuminer-opt/issues
 All problem reports must be accompanied by a proper problem definition.
 This should include how the problem occurred, the command line and
 output from the miner showing the startup messages and any errors.
 A history is also useful, ie did it work before.
 Donations
 ---------
 cpuminer-opt has no fees of any kind but donations are accepted.
 BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
 Happy mining!
--- a/rin/miner/cpuminer/README.txt
+++ b/rin/miner/cpuminer/README.txt
@@ -1,80 +0,0 @@
 This file is included in the Windows binary package. Compile instructions
 for Linux and Windows can be found in RELEASE_NOTES.
 cpuminer-opt is open source and free of any fees. Many forks exist that are
 closed source and contain usage fees. support open source free software.
 This package is officially avalaible only from:
 https://github.com/JayDDee/cpuminer-opt
 No other sources should be trusted.
 cpuminer is a console program that is executed from a DOS or Powershell
 command prompt. There is no GUI and no mouse support.
 New users are encouraged to consult the cpuminer-opt Wiki for detailed
 information on usage:
 https://github.com/JayDDee/cpuminer-opt/wiki
 Miner programs are often flagged as malware by antivirus programs. This is
 a false positive, they are flagged simply because they are cryptocurrency 
 miners. The source code is open for anyone to inspect. If you don't trust
 the software, don't use it.
 Choose the exe that best matches you CPU's features or use trial and
 error to find the fastest one that works. Pay attention to
 the features listed at cpuminer startup to ensure you are mining at
 optimum speed using the best available features.
 Architecture names and compile options used are only provided for 
 mainstream desktop CPUs. Budget CPUs like Pentium and Celeron are often
 missing some features. Check your CPU.
 Support for AMD CPUs older than Ryzen is incomplete and without specific 
 recommendations. Find the best fit. CPUs older than Piledriver, including
 Athlon x2 and Phenom II x4, are not supported by cpuminer-opt due to an
 incompatible implementation of SSE2 on these CPUs. 
 More information for Intel and AMD CPU architectures and their features
 can be found on Wikipedia.
 https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures
 https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
 File name                      Architecture name
 cpuminer-sse2.exe              Core2, Nehalem, generic x86_64 with SSE2   
 cpuminer-aes-sse42.exe         Westmere
 cpuminer-avx.exe               Sandybridge, Ivybridge
 cpuminer-avx2.exe              Haswell, Skylake, Kabylake, Coffeelake, Cometlake
 cpuminer-avx2-sha.exe          AMD Zen1, Zen2
 cpuminer-avx2-sha-vaes.exe     Intel Alderlake*, AMD Zen3
 cpuminer-avx512.exe            Intel HEDT Skylake-X, Cascadelake
 cpuminer-avx512-sha-vaes.exe   AMD Zen4, Intel Rocketlake, Icelake
 * Alderlake is a hybrid architecture with a mix of E-cores & P-cores. Although
  the P-cores can support AVX512 the E-cores can't so Intel decided to disable
  AVX512 on the the P-cores.
 Notes about included DLL files:
 Downloading DLL files from alternative sources presents an inherent
 security risk if their source is unknown. All DLL files included have
 been copied from the Ubuntu-20.04 installation or compiled by me from
 source code obtained from the author's official repository. The exact
 procedure is documented in the build instructions for Windows:
 https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
 Some included DLL files may already be installed on the system by Windows or
 third party packages. They often will work and may be used instead of the
 included version of the files.
 If you like this software feel free to donate:
 BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
--- a/rin/miner/cpuminer/RELEASE_NOTES
+++ b/rin/miner/cpuminer/RELEASE_NOTES
--- a/rin/miner/cpuminer/aclocal.m4
+++ b/rin/miner/cpuminer/aclocal.m4
--- a/rin/miner/cpuminer/algo-gate-api.c
+++ b/rin/miner/cpuminer/algo-gate-api.c
@@ -1,465 +0,0 @@
 /////////////////////////////
 ////
 ////    NEW FEATURE: algo_gate
 ////
 ////    algos define targets for their common functions
 ////    and define a function for miner-thread to call to register
 ////    their targets. miner thread builds the gate, and array of structs
 ////    of function pointers, by calling each algo's register function.
 //   Functions in this file are used simultaneously by myultiple
 //   threads and must therefore be re-entrant.
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdbool.h>
 #include <memory.h>
 #include <unistd.h>
 #include "algo-gate-api.h"
 // Define null and standard functions.
 //
 // Generic null functions do nothing except satisfy the syntax and
 // can be used for optional safe gate functions.
 //
 // null gate functions are genarally used for mandatory and unsafe functions
 // and will usually display an error massage and/or return a fail code.
 // They are registered by default and are expected to be overwritten.
 //
 // std functions are non-null functions used by the most number of algos
 // are are default.
 //
 // aux functions are functions used by many, but not most, algos and must
 // be registered by eech algo using them. They usually have descriptive
 // names.
 //
 // custom functions are algo spefic and are defined and registered in the
 // algo's source file and are usually named [algo]_[function]. 
 //
 // In most cases the default is a null or std function. However in some
 // cases, for convenience when the null function is not the most popular,
 // the std function will be defined as default and the algo must register
 // an appropriate null function.
 //
 // similar algos may share a gate function that may be defined here or
 // in a source file common to the similar algos.
 //
 // gate functions may call other gate functions under the following
 // restrictions. Any gate function defined here or used by more than one
 // algo must call other functions using the gate: algo_gate.[function]. 
 // custom functions may call other custom functions directly using
 // [algo]_[function], howver it is recommended to alway use the gate.
 //
 // If, under rare circumstances, an algo with a custom gate function 
 // needs to call a function of another algo it must define and register
 // a private gate from its rgistration function and use it to call
 // forein functions: [private_gate].[function]. If the algo needs to call
 // a utility function defined here it may do so directly.
 //
 // The algo's gate registration function is caled once from the main thread
 // and can do other intialization in addition such as setting options or
 // other global or local (to the algo) variables.
 // A set of predefined generic null functions that can be used as any null
 // gate function with the same signature. 
 void do_nothing   () {}
 bool return_true  () { return true;  }
 bool return_false () { return false; }
 void *return_null () { return NULL;  }
 void algo_not_tested()
 {
  applog( LOG_WARNING,"Algo %s has not been tested live. It may not work",
          algo_names[opt_algo] );
  applog(LOG_WARNING,"and bad things may happen. Use at your own risk.");
 }
 void four_way_not_tested()
 {
  applog( LOG_WARNING,"Algo %s has not been tested using 4way. It may not", algo_names[opt_algo] );
  applog( LOG_WARNING,"work or may be slower. Please report your results.");
 }
 void algo_not_implemented()
 {
  applog(LOG_ERR,"Algo %s has not been Implemented.",algo_names[opt_algo]);
 }
 // default null functions
 // deprecated, use generic as default
 int null_scanhash()
 {
   applog(LOG_WARNING,"SWERR: undefined scanhash function in algo_gate");
   return 0;
 }
 // Default generic scanhash can be used in many cases. Not to be used when
 // prehashing can be done or when byte swapping the data can be avoided.
 int scanhash_generic( struct work *work, uint32_t max_nonce,
                      uint64_t *hashes_done, struct thr_info *mythr )
 {
   uint32_t edata[20] __attribute__((aligned(64)));
   uint32_t hash[8] __attribute__((aligned(64)));
   uint32_t *pdata = work->data;
   uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 1;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
   const bool bench = opt_benchmark;
   v128_bswap32_80( edata, pdata );
   do
   {
      edata[19] = n;
      if ( likely( algo_gate.hash( hash, edata, thr_id ) ) )
      if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
      {
         pdata[19] = bswap_32( n );
         submit_solution( work, hash, mythr );
      }
      n++;
   } while ( n < last_nonce && !work_restart[thr_id].restart );
   *hashes_done = n - first_nonce;
   pdata[19] = n;
   return 0;
 }
 #if defined(__AVX2__)
 //int scanhash_4way_64_64( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr )
 //int scanhash_4way_64_640( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr )
 int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
                      uint64_t *hashes_done, struct thr_info *mythr )
 {
   uint32_t hash32[8*4] __attribute__ ((aligned (64)));
   uint32_t vdata[20*4] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
   uint32_t *hash32_d7 = &(hash32[ 7*4 ]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 4;
   __m256i  *noncev = (__m256i*)vdata + 9;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
   const uint32_t targ32_d7 = ptarget[7];
   const bool bench = opt_benchmark;
   mm256_bswap32_intrlv80_4x64( vdata, pdata );
   // overwrite byte swapped nonce with original byte order for proper
   // incrementing. The nonce only needs to byte swapped if it is to be
   // sumbitted.
   *noncev = mm256_intrlv_blend_32(
                   _mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
   do
   {
      if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
      for ( int lane = 0; lane < 4; lane++ )
      if ( unlikely( hash32_d7[ lane ] <= targ32_d7 && !bench ) )
      {
         extr_lane_4x32( lane_hash, hash32, lane, 256 );
         if ( valid_hash( lane_hash, ptarget ) )
         {
            pdata[19] = bswap_32( n + lane );
            submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm256_add_epi32( *noncev,
                                  _mm256_set1_epi64x( 0x0000000400000000 ) );
      n += 4;
   } while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
 //int scanhash_8way_32_32( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr )
 #endif
 #if defined(SIMD512)
 //int scanhash_8way_64_64( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr )
 //int scanhash_8way_64_640( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr )
 int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
                      uint64_t *hashes_done, struct thr_info *mythr )
 {
   uint32_t hash32[8*8] __attribute__ ((aligned (128)));
   uint32_t vdata[20*8] __attribute__ ((aligned (64)));
   uint32_t lane_hash[8] __attribute__ ((aligned (64)));
   uint32_t *hash32_d7 = &(hash32[7*8]);
   uint32_t *pdata = work->data;
   const uint32_t *ptarget = work->target;
   const uint32_t first_nonce = pdata[19];
   const uint32_t last_nonce = max_nonce - 8;
   __m512i  *noncev = (__m512i*)vdata + 9;
   uint32_t n = first_nonce;
   const int thr_id = mythr->id;
   const uint32_t targ32_d7 = ptarget[7];
   const bool bench = opt_benchmark;
   mm512_bswap32_intrlv80_8x64( vdata, pdata );
   *noncev = mm512_intrlv_blend_32(
              _mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
                                n+3, 0, n+2, 0, n+1, 0, n,   0 ), *noncev );
   do
   {
      if ( likely( algo_gate.hash( hash32, vdata, thr_id ) ) )
      for ( int lane = 0; lane < 8; lane++ )
      if ( unlikely( ( hash32_d7[ lane ] <= targ32_d7 ) && !bench ) )
      {
         extr_lane_8x32( lane_hash, hash32, lane, 256 );
         if ( likely( valid_hash( lane_hash, ptarget ) ) )
         {
            pdata[19] = bswap_32( n + lane );
            submit_solution( work, lane_hash, mythr );
         }
      }
      *noncev = _mm512_add_epi32( *noncev,
                                  _mm512_set1_epi64( 0x0000000800000000 ) );
      n += 8;
   } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
   pdata[19] = n;
   *hashes_done = n - first_nonce;
   return 0;
 }
 //int scanhash_16way_32_32( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr )
 #endif
 int null_hash()
 {
   applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
   return 0;
 };
 static void init_algo_gate( algo_gate_t* gate )
 {
   gate->miner_thread_init       = (void*)&return_true;
   gate->scanhash                = (void*)&scanhash_generic;
   gate->hash                    = (void*)&null_hash;
   gate->get_new_work            = (void*)&std_get_new_work;
   gate->work_decode             = (void*)&std_le_work_decode;
   gate->decode_extra_data       = (void*)&do_nothing;
   gate->gen_merkle_root         = (void*)&sha256d_gen_merkle_root;
   gate->build_stratum_request   = (void*)&std_le_build_stratum_request;
   gate->malloc_txs_request      = (void*)&std_malloc_txs_request;
   gate->submit_getwork_result   = (void*)&std_le_submit_getwork_result;
   gate->build_block_header      = (void*)&std_build_block_header;
   gate->build_extraheader       = (void*)&std_build_extraheader;
   gate->set_work_data_endian    = (void*)&do_nothing;
 //   gate->resync_threads          = (void*)&do_nothing;
 //   gate->do_this_thread          = (void*)&return_true;
   gate->longpoll_rpc_call       = (void*)&std_longpoll_rpc_call;
   gate->get_work_data_size      = (void*)&std_get_work_data_size;
   gate->optimizations           = EMPTY_SET;
   gate->ntime_index             = STD_NTIME_INDEX;
   gate->nbits_index             = STD_NBITS_INDEX;
   gate->nonce_index             = STD_NONCE_INDEX;
   gate->work_cmp_size           = STD_WORK_CMP_SIZE;
 }
 // Ignore warnings for not yet defined register functions
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
 // Called once by main
 bool register_algo_gate( int algo, algo_gate_t *gate )
 {
  bool rc = false;
  if ( NULL == gate )
  {
    applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n");
    return false;
  }
  init_algo_gate( gate );
  switch ( algo )
  {
    case ALGO_ALLIUM:       rc = register_allium_algo        ( gate ); break;
    case ALGO_ANIME:        rc = register_anime_algo         ( gate ); break;
    case ALGO_ARGON2D250:   rc = register_argon2d250_algo    ( gate ); break;
    case ALGO_ARGON2D500:   rc = register_argon2d500_algo    ( gate ); break;
    case ALGO_ARGON2D4096:  rc = register_argon2d4096_algo   ( gate ); break;
    case ALGO_AXIOM:        rc = register_axiom_algo         ( gate ); break;
    case ALGO_BLAKE:        rc = register_blake_algo         ( gate ); break;
    case ALGO_BLAKE2B:      rc = register_blake2b_algo       ( gate ); break;
    case ALGO_BLAKE2S:      rc = register_blake2s_algo       ( gate ); break;
    case ALGO_BLAKECOIN:    rc = register_blakecoin_algo     ( gate ); break;
    case ALGO_BMW512:       rc = register_bmw512_algo        ( gate ); break;
    case ALGO_C11:          rc = register_c11_algo           ( gate ); break;
    case ALGO_DEEP:         rc = register_deep_algo          ( gate ); break;
    case ALGO_DMD_GR:       rc = register_dmd_gr_algo        ( gate ); break;
    case ALGO_GROESTL:      rc = register_groestl_algo       ( gate ); break;
    case ALGO_HEX:          rc = register_hex_algo           ( gate ); break;
    case ALGO_HMQ1725:      rc = register_hmq1725_algo       ( gate ); break;
    case ALGO_JHA:          rc = register_jha_algo           ( gate ); break;
    case ALGO_KECCAK:       rc = register_keccak_algo        ( gate ); break;
    case ALGO_KECCAKC:      rc = register_keccakc_algo       ( gate ); break;
    case ALGO_LBRY:         rc = register_lbry_algo          ( gate ); break;
    case ALGO_LYRA2H:       rc = register_lyra2h_algo        ( gate ); break;
    case ALGO_LYRA2RE:      rc = register_lyra2re_algo       ( gate ); break;
    case ALGO_LYRA2REV2:    rc = register_lyra2rev2_algo     ( gate ); break;
    case ALGO_LYRA2REV3:    rc = register_lyra2rev3_algo     ( gate ); break;
    case ALGO_LYRA2Z:       rc = register_lyra2z_algo        ( gate ); break;
    case ALGO_LYRA2Z330:    rc = register_lyra2z330_algo     ( gate ); break;
    case ALGO_M7M:          rc = register_m7m_algo           ( gate ); break;
    case ALGO_MINOTAUR:     rc = register_minotaur_algo      ( gate ); break;
    case ALGO_MINOTAURX:    rc = register_minotaur_algo      ( gate ); break;
    case ALGO_MYR_GR:       rc = register_myriad_algo        ( gate ); break;
    case ALGO_NEOSCRYPT:    rc = register_neoscrypt_algo     ( gate ); break;
    case ALGO_NIST5:        rc = register_nist5_algo         ( gate ); break;
    case ALGO_PENTABLAKE:   rc = register_pentablake_algo    ( gate ); break;
    case ALGO_PHI1612:      rc = register_phi1612_algo       ( gate ); break;
    case ALGO_PHI2:         rc = register_phi2_algo          ( gate ); break;
    case ALGO_POLYTIMOS:    rc = register_polytimos_algo     ( gate ); break;
    case ALGO_POWER2B:      rc = register_power2b_algo       ( gate ); break;
    case ALGO_QUARK:        rc = register_quark_algo         ( gate ); break;
    case ALGO_QUBIT:        rc = register_qubit_algo         ( gate ); break;
    case ALGO_SCRYPT:       rc = register_scrypt_algo        ( gate ); break;
    case ALGO_SHA256D:      rc = register_sha256d_algo       ( gate ); break;
    case ALGO_SHA256DT:     rc = register_sha256dt_algo      ( gate ); break;
    case ALGO_SHA256Q:      rc = register_sha256q_algo       ( gate ); break;
    case ALGO_SHA256T:      rc = register_sha256t_algo       ( gate ); break;
    case ALGO_SHA3D:        rc = register_sha3d_algo         ( gate ); break;
    case ALGO_SHA512256D:   rc = register_sha512256d_algo    ( gate ); break;
    case ALGO_SKEIN:        rc = register_skein_algo         ( gate ); break;
    case ALGO_SKEIN2:       rc = register_skein2_algo        ( gate ); break;
    case ALGO_SKUNK:        rc = register_skunk_algo         ( gate ); break;
    case ALGO_SONOA:        rc = register_sonoa_algo         ( gate ); break;
    case ALGO_TIMETRAVEL:   rc = register_timetravel_algo    ( gate ); break;
    case ALGO_TIMETRAVEL10: rc = register_timetravel10_algo  ( gate ); break;
    case ALGO_TRIBUS:       rc = register_tribus_algo        ( gate ); break;
    case ALGO_VANILLA:      rc = register_vanilla_algo       ( gate ); break;
    case ALGO_VELTOR:       rc = register_veltor_algo        ( gate ); break;
    case ALGO_VERTHASH:     rc = register_verthash_algo      ( gate ); break;
    case ALGO_WHIRLPOOL:    rc = register_whirlpool_algo     ( gate ); break;
    case ALGO_WHIRLPOOLX:   rc = register_whirlpoolx_algo    ( gate ); break;
    case ALGO_X11:          rc = register_x11_algo           ( gate ); break;
    case ALGO_X11EVO:       rc = register_x11evo_algo        ( gate ); break;
    case ALGO_X11GOST:      rc = register_x11gost_algo       ( gate ); break;
    case ALGO_X12:          rc = register_x12_algo           ( gate ); break;
    case ALGO_X13:          rc = register_x13_algo           ( gate ); break;
    case ALGO_X13BCD:       rc = register_x13bcd_algo        ( gate ); break;
    case ALGO_X13SM3:       rc = register_x13sm3_algo        ( gate ); break;
    case ALGO_X14:          rc = register_x14_algo           ( gate ); break;
    case ALGO_X15:          rc = register_x15_algo           ( gate ); break;
    case ALGO_X16R:         rc = register_x16r_algo          ( gate ); break;
    case ALGO_X16RV2:       rc = register_x16rv2_algo        ( gate ); break;
    case ALGO_X16RT:        rc = register_x16rt_algo         ( gate ); break;
    case ALGO_X16RT_VEIL:   rc = register_x16rt_veil_algo    ( gate ); break;
    case ALGO_X16S:         rc = register_x16s_algo          ( gate ); break;
    case ALGO_X17:          rc = register_x17_algo           ( gate ); break;
    case ALGO_X20R:         rc = register_x20r_algo          ( gate ); break;
    case ALGO_X21S:         rc = register_x21s_algo          ( gate ); break;
    case ALGO_X22I:         rc = register_x22i_algo          ( gate ); break;
    case ALGO_X25X:         rc = register_x25x_algo          ( gate ); break;
    case ALGO_XEVAN:        rc = register_xevan_algo         ( gate ); break;
    case ALGO_YESCRYPT:     rc = register_yescrypt_algo      ( gate ); break;
    case ALGO_YESCRYPTR8:   rc = register_yescryptr8_algo    ( gate ); break;
    case ALGO_YESCRYPTR8G:  rc = register_yescryptr8g_algo   ( gate ); break;
    case ALGO_YESCRYPTR16:  rc = register_yescryptr16_algo   ( gate ); break;
    case ALGO_YESCRYPTR32:  rc = register_yescryptr32_algo   ( gate ); break;
    case ALGO_YESPOWER:     rc = register_yespower_algo      ( gate ); break;
    case ALGO_YESPOWERR16:  rc = register_yespowerr16_algo   ( gate ); break;
    case ALGO_YESPOWER_B2B: rc = register_yespower_b2b_algo  ( gate ); break;
    case ALGO_ZR5:          rc = register_zr5_algo           ( gate ); break;
    case ALGO_RINHASH:      rc = register_rin_algo           ( gate ); break;
   default:
      applog(LOG_ERR,"BUG: unregistered algorithm %s.\n", algo_names[opt_algo] );
      return false;
  } // switch
  if ( !rc )
  {
    applog(LOG_ERR, "FAIL: %s algorithm failed to initialize\n", algo_names[opt_algo] );
    return false;
  }
  return true;
 }
 // restore warnings
 #pragma GCC diagnostic pop
 void exec_hash_function( int algo, void *output, const void *pdata )
 {
  algo_gate_t gate;   
  gate.hash = (void*)&null_hash;
  register_algo_gate( algo, &gate );
  gate.hash( output, pdata, 0 );  
 }
 #define PROPER (1)
 #define ALIAS  (0)
 // The only difference between the alias and the proper algo name is the
 // proper name is the one that is defined in ALGO_NAMES. There may be
 // multiple aliases that map to the same proper name.
 // New aliases can be added anywhere in the array as long as NULL is last.
 // Alphabetic order of alias is recommended.
 const char* const algo_alias_map[][2] =
 {
 //   alias                proper
  { "bcd",               "x13bcd"         },
  { "bitcore",           "timetravel10"   },
  { "bitzeny",           "yescryptr8"     },
  { "blake256r8",        "blakecoin"      },
  { "blake256r8vnl",     "vanilla"        },
  { "blake256r14",       "blake"          },
  { "diamond",           "dmd-gr"         },
  { "espers",            "hmq1725"        },
  { "flax",              "c11"            },
  { "hsr",               "x13sm3"         },
  { "jackpot",           "jha"            },
  { "lyra2",             "lyra2re"        },
  { "lyra2v2",           "lyra2rev2"      },
  { "lyra2v3",           "lyra2rev3"      },
  { "myrgr",             "myr-gr"         },
  { "myriad",            "myr-gr"         },
  { "neo",               "neoscrypt"      },
  { "phi",               "phi1612"        },
  { "scryptn2",          "scrypt:1048576" },
  { "sib",               "x11gost"        },
  { "timetravel8",       "timetravel"     },
  { "veil",              "x16rt-veil"     },
  { "x16r-hex",          "hex"            },
  { "yenten",            "yescryptr16"    },
  { "ziftr",             "zr5"            },
  { "rinhash",           "rinhash"        } 
 };
 // if arg is a valid alias for a known algo it is updated with the proper
 // name. No validation of the algo or alias is done, It is the responsinility
 // of the calling function to validate the algo after return.
 void get_algo_alias( char** algo_or_alias )
 {
  int i;
  for ( i=0; algo_alias_map[i][ALIAS]; i++ )
    if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
    {
      // found valid alias, return proper name
      *algo_or_alias = (char*)( algo_alias_map[i][ PROPER ] );
      return;
    }
 }
 #undef ALIAS
 #undef PROPER
--- a/rin/miner/cpuminer/algo-gate-api.h
+++ b/rin/miner/cpuminer/algo-gate-api.h
@@ -1,324 +0,0 @@
 #ifndef __ALGO_GATE_API_H__
 #define __ALGO_GATE_API_H__ 1
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include "miner.h"
 #include "simd-utils.h"
 /////////////////////////////
 ////
 ////    NEW FEATURE: algo_gate
 ////
 ////    algos define targets for their common functions
 ////    and define a function for miner-thread to call to register
 ////    their targets. miner thread builds the gate, and array of structs
 ////    of function pointers, by calling each algo's register function.
 //
 //
 // 
 //    So you want to add an algo. Well it is a little easier now.
 //    Look at existing algos for guidance.
 //
 //    1. Define the algo, miner.h, previously in cpu-miner.c
 //
 //    2.Define custom versions of the mandatory function for the new algo.
 //
 //    3. Next look through the list of unsafe functions to determine
 //    if any apply to the new algo. If so they must also be defined.
 //
 //    4. Look through the list of safe functions to see if any apply
 //    to the new algo. If so look at the null instance of the function
 //    to see if it satisfies its needs.
 //
 //    5. If any of the default safe functions are not fit for the new algo
 //    a custom function will have to be defined.
 //
 //    6. Determine if other non existant functions are required.
 //    That is determined by the need to add code in cpu-miner.c
 //    that applies only to the new algo. That is forbidden. All
 //    algo specific code must be in the algo's file.
 //
 //    7. If new functions need to be added to the gate add the type
 //    to the structure, declare a null instance in this file and define
 //    it in algo-gate-api.c. It must be a safe optional function so the null
 //    instance must return a success code and otherwise do nothing.
 //
 //    8. When all the custom functions are defined write a registration
 //    function to initialze the gate's function pointers with the custom
 //    functions. It is not necessary to initialze safe optional null
 //    instances as they are defined by default, or unsafe functions that
 //    are not needed by the algo.
 //
 //    9. Add a case entry to the switch/case in function register_gate
 //    in file algo-gate-api.c for the new algo.
 //
 //    10 If a new function type was defined add an entry to init algo_gate
 //    to initialize the new function to its null instance described in step 7.
 //
 //    11. If the new algo has aliases add them to the alias array in
 //    algo-gate-api.c 
 //
 //    12. Include algo-gate-api.h and miner.h inthe algo's source file.
 //
 //    13. Inlude any other algo source files required by the new algo.
 //
 //    14. Done, compile and run. 
 // declare some function pointers
 // mandatory functions require a custom function specific to the algo
 // be defined. 
 // otherwise the null instance will return a fail code.
 // Optional functions may not be required for certain algos or the null
 // instance provides a safe default. If the default is suitable for
 //  an algo it is not necessary to define a custom function.
 //
 // my hack at creating a set data type using bit masks. Set inclusion,
 // exclusion union and intersection operations are provided for convenience. In // some cases it may be desireable to use boolean algebra directly on the
 // data to perform set operations. Sets can be represented as single
 // elements, a bitwise OR of multiple elements, a bitwise OR of multiple
 // set variables or constants, or combinations of the above.
 // Examples:
 //
 // my_set = set_element;
 // another_set = my_set | another_set_element;
 typedef  uint32_t set_t;
 #define EMPTY_SET        0
 #define SSE2_OPT         1         // parity with NEON
 #define SSSE3_OPT        1 <<  1   // Intel Core2
 #define SSE41_OPT        1 <<  2
 #define SSE42_OPT        1 <<  3
 #define AVX_OPT          1 <<  4   // Intel Sandybridge
 #define AVX2_OPT         1 <<  5   // Intel Haswell, AMD Zen1
 #define AVX512_OPT       1 <<  6   // Skylake-X, Zen4 (AVX512[F,VL,DQ,BW])
 #define AES_OPT          1 <<  7   // Intel Westmere, AArch64
 #define VAES_OPT         1 <<  8   // Icelake, Zen3
 #define SHA256_OPT       1 <<  9   // Zen1, Icelake, AArch64 
 #define SHA512_OPT       1 << 10   // Intel Arrow Lake, AArch64 
 #define NEON_OPT         1 << 11   // AArch64 
 #define AVX10_256        1 << 12
 #define AVX10_512        1 << 13
 // AVX10 does not have explicit algo features:
 //  AVX10_512 is compatible with AVX512 + VAES
 //  AVX10_256 is compatible with AVX2 + VAES
 // return set containing all elements from sets a & b
 static inline set_t set_union ( set_t a, set_t b ) { return a | b; }
 // return set contained common elements from sets a & b
 static inline set_t set_intsec ( set_t a, set_t b) { return a & b; }
 // all elements in set a are included in set b
 static inline bool set_incl ( set_t a, set_t b ) { return (a & b) == a; }
 // no elements in set a are included in set b
 static inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
 typedef struct
 {
 // Mandatory functions, one of these is mandatory. If a generic scanhash
 // is used a custom target hash function must be registered, with a custom
 // scanhash the target hash function can be called directly and doesn't need
 // to be registered with the gate. 
 int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
 int ( *hash )     ( void*, const void*, int );
 //optional, safe to use default in most cases
 // Called once by each miner thread to allocate thread local buffers and
 // other initialization specific to miner threads.
 bool ( *miner_thread_init )     ( int );
 // Get thread local copy of blockheader with unique nonce.
 void ( *get_new_work )          ( struct work*, struct work*, int, uint32_t* );
 // Decode getwork blockheader
 bool ( *work_decode )           ( struct work* );
 // Extra getwork data
 void ( *decode_extra_data )     ( struct work*, uint64_t* );
 bool ( *submit_getwork_result ) ( CURL*, struct work* );
 void ( *gen_merkle_root )       ( char*, struct stratum_ctx* );
 // Increment extranonce
 void ( *build_extraheader )     ( struct work*, struct stratum_ctx* );
 void ( *build_block_header )    ( struct work*, uint32_t, uint32_t*,
 	                                uint32_t*,   uint32_t, uint32_t,
                                   unsigned char* );
 // Build mining.submit message
 void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
 char* ( *malloc_txs_request )   ( struct work* );
 // Big endian or little endian
 void ( *set_work_data_endian )  ( struct work* );
 // Diverge mining threads
 //bool ( *do_this_thread )        ( int );
 // After do_this_thread
 //void ( *resync_threads )        ( int, struct work* );
 json_t* ( *longpoll_rpc_call )  ( CURL*, int*, char* );
 set_t optimizations;
 int  ( *get_work_data_size )     ();
 int  ntime_index;
 int  nbits_index;
 int  nonce_index;            // use with caution, see warning below
 int  work_cmp_size;
 } algo_gate_t;
 extern algo_gate_t algo_gate;
 // Declare generic null targets, default for many gate functions
 // Functions that use one of these generic targets do not have
 // a default defined below. Some algos may override a defined default
 // with a generic.
 void do_nothing();
 bool return_true();
 bool return_false();
 void *return_null();
 void algo_not_tested();
 void algo_not_implemented();
 void four_way_not_tested();
 // Warning: algo_gate.nonce_index should only be used in targetted code
 // due to different behaviours by different targets. The JR2 index uses an
 // 8 bit offset while all others user 32 bit offset. c/c++ pointer arithmetic
 // conventions results in different behaviour for pointers with different
 // target sizes requiring customized casting to make it work consistently.
 // Rant mode: yet another thing I hate about c/c++. Array indexes should
 // be scaled, pointer offsets should always be bytes. No confusion and no
 // hidden math.
 #define STD_NTIME_INDEX 17
 #define STD_NBITS_INDEX 18
 #define STD_NONCE_INDEX 19   // 32 bit offset
 #define STD_WORK_DATA_SIZE 128
 #define STD_WORK_CMP_SIZE 76
 //#define JR2_NONCE_INDEX 39  // 8 bit offset
 // These indexes are only used with JSON RPC2 and are not gated.
 //#define JR2_WORK_CMP_INDEX_2 43
 //#define JR2_WORK_CMP_SIZE_2 33
 // deprecated, use generic instead
 int null_scanhash();
 // Default generic, may be used in many cases.
 // N-way is more complicated, requires many different implementations
 // depending on architecture, input format, and output format.
 // Naming convention is scanhash_[N]way_[input format]in_[output format]out
 // N = number of lanes
 // input/output format:
 //    32: 32 bit interleaved parallel lanes
 //    64: 64 bit interleaved parallel lanes
 //    640: input only, not interleaved, contiguous serial 640 bit lanes.
 //    256: output only, not interleaved, contiguous serial 256 bit lanes.
 int scanhash_generic( struct work *work, uint32_t max_nonce,
                      uint64_t *hashes_done, struct thr_info *mythr );
 #if defined(__AVX2__)
 //int scanhash_4way_64in_64out( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr );
 //int scanhash_4way_64in_256out( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr );
 int scanhash_4way_64in_32out( struct work *work, uint32_t max_nonce,
                      uint64_t *hashes_done, struct thr_info *mythr );
 //int scanhash_8way_32in_32out( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr );
 #endif
 #if defined(SIMD512)
 //int scanhash_8way_64in_64out( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr );
 //int scanhash_8way_64in_256out( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr );
 int scanhash_8way_64in_32out( struct work *work, uint32_t max_nonce,
                      uint64_t *hashes_done, struct thr_info *mythr );
 //int scanhash_16way_32in_32out( struct work *work, uint32_t max_nonce,
 //                      uint64_t *hashes_done, struct thr_info *mythr );
 #endif
 // displays warning
 int null_hash();
 // optional safe targets, default listed first unless noted.
 void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
                       uint32_t* end_nonce_ptr );
 void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
 void sha256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
 // OpenSSL sha256 deprecated
 //void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
 bool std_le_work_decode( struct work *work );
 bool std_be_work_decode( struct work *work );
 bool std_le_submit_getwork_result( CURL *curl, struct work *work );
 bool std_be_submit_getwork_result( CURL *curl, struct work *work );
 void std_le_build_stratum_request( char *req, struct work *work );
 void std_be_build_stratum_request( char *req, struct work *work );
 char* std_malloc_txs_request( struct work *work );
 // Default is do_nothing, little endian is assumed
 void set_work_data_big_endian( struct work *work );
 void std_build_block_header( struct work* g_work, uint32_t version,
 	                          uint32_t *prevhash,  uint32_t *merkle_root,
   	                       uint32_t ntime,      uint32_t nbits,
                             unsigned char *final_sapling_hash );
 void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
 json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
 int std_get_work_data_size();
 // Gate admin functions
 // Called from main to initialize all gate functions and algo-specific data
 // by calling the algo's register function.
 bool register_algo_gate( int algo, algo_gate_t *gate );
 // Called by algos to verride any default gate functions that are applicable
 // and do any other algo-specific initialization.
 // The register functions for all the algos can be declared here to reduce
 // compiler warnings but that's just more work for devs adding new algos.
 bool register_algo( algo_gate_t *gate );
 // use this to call the hash function of an algo directly, ie util.c test.
 void exec_hash_function( int algo, void *output, const void *pdata );
 // Validate a string as a known algo and alias, updates arg to proper
 // algo name if valid alias, NULL if invalid alias or algo.
 void get_algo_alias( char **algo_or_alias );
 #endif
--- a/rin/miner/cpuminer/api.c
+++ b/rin/miner/cpuminer/api.c
@@ -1,732 +0,0 @@
 /*
 * Copyright 2014 ccminer team
 *
 * Implementation by tpruvot (based on cgminer)
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.  See COPYING for more details.
 */
 #define APIVERSION "1.0"
 #ifdef WIN32
 # define  _WINSOCK_DEPRECATED_NO_WARNINGS
 # include <winsock2.h>
 #endif
 #include <stdio.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdbool.h>
 #include <inttypes.h>
 #include <unistd.h>
 #include <sys/time.h>
 #include <time.h>
 #include <math.h>
 #include <stdarg.h>
 #include <assert.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include "algo/sha/sha1-hash.h"
 #include "miner.h"
 #include "sysinfos.c"
 #ifndef WIN32
 # include <errno.h>
 # include <sys/socket.h>
 # include <netinet/in.h>
 # include <arpa/inet.h>
 # include <netdb.h>
 # define SOCKETTYPE long
 # define SOCKETFAIL(a) ((a) < 0)
 # define INVSOCK -1 /* INVALID_SOCKET */
 # define INVINETADDR -1 /* INADDR_NONE */
 # define CLOSESOCKET close
 # define SOCKETINIT {}
 # define SOCKERRMSG strerror(errno)
 #else
 # define SOCKETTYPE SOCKET
 # define SOCKETFAIL(a) ((a) == SOCKET_ERROR)
 # define INVSOCK INVALID_SOCKET
 # define INVINETADDR INADDR_NONE
 # define CLOSESOCKET closesocket
 # define in_addr_t uint32_t
 #endif
 #define GROUP(g) (toupper(g))
 #define PRIVGROUP GROUP('W')
 #define NOPRIVGROUP GROUP('R')
 #define ISPRIVGROUP(g) (GROUP(g) == PRIVGROUP)
 #define GROUPOFFSET(g) (GROUP(g) - GROUP('A'))
 #define VALIDGROUP(g) (GROUP(g) >= GROUP('A') && GROUP(g) <= GROUP('Z'))
 #define COMMANDS(g) (apigroups[GROUPOFFSET(g)].commands)
 #define DEFINEDGROUP(g) (ISPRIVGROUP(g) || COMMANDS(g) != NULL)
 struct APIGROUPS {
 	// This becomes a string like: "|cmd1|cmd2|cmd3|" so it's quick to search
 	char *commands;
 } apigroups['Z' - 'A' + 1]; // only A=0 to Z=25 (R: noprivs, W: allprivs)
 struct IP4ACCESS {
 	in_addr_t ip;
 	in_addr_t mask;
 	char group;
 };
 static int ips = 1;
 static struct IP4ACCESS *ipaccess = NULL;
 // Socket data buffers
 #define MYBUFSIZ	16384
 #define SOCK_REC_BUFSZ	1024
 // Socket is on 127.0.0.1
 #define QUEUE	10
 #define ALLIP4 "0.0.0.0"
 static const char *localaddr = "127.0.0.1";
 static const char *UNAVAILABLE = " - API will not be available";
 static char *buffer = NULL;
 static time_t startup = 0;
 static int bye = 0;
 extern char *opt_api_allow;
 extern int opt_api_listen; /* port */
 extern int opt_api_remote;
 extern double global_hashrate;
 //extern uint32_t accepted_count;
 //extern uint32_t rejected_count;
 //extern uint32_t solved_count;
 #define cpu_threads opt_n_threads
 #define USE_MONITORING
 extern float cpu_temp(int);
 extern uint32_t cpu_clock(int);
 //extern int cpu_fanpercent(void);
 /***************************************************************/
 static void cpustatus(int thr_id)
 {
   if ( thr_id >= 0 && thr_id < opt_n_threads )
   {
 //      struct cpu_info *cpu = &thr_info[thr_id].cpu;
      char buf[512]; *buf = '\0';
      char units[4] = {0};
      double hashrate = thr_hashrates[thr_id];
      scale_hash_for_display ( &hashrate, units );
      snprintf( buf, sizeof(buf), "CPU=%d;%sH/s=%.2f|", thr_id, units,
                hashrate );
      // append to buffer
      strcat( buffer, buf );
   }
 }
 /*****************************************************************************/
 /**
 * Returns miner global infos
 */
 static char *getsummary( char *params )
 {
   char algo[64]; *algo = '\0';
   time_t ts = time(NULL);
   double uptime = difftime(ts, startup);
   double accps = (60.0 * accepted_share_count) / (uptime ? uptime : 1.0);
   double diff = net_diff > 0. ? net_diff : stratum_diff;
   char diff_str[16];
   double hrate = (double)global_hashrate;
   struct cpu_info cpu = { 0 };
 #ifdef USE_MONITORING
   cpu.has_monitoring = true;
   cpu.cpu_temp = cpu_temp(0);
   cpu.cpu_fan = cpu_fanpercent();
   cpu.cpu_clock = cpu_clock(0);
 #endif
   get_currentalgo(algo, sizeof(algo));
   // if diff is integer don't display decimals
   if ( diff == trunc( diff ) )
       sprintf( diff_str, "%.0f", diff);
   else
       sprintf( diff_str, "%.6f", diff);
   *buffer = '\0';
   sprintf( buffer,
 	  "NAME=%s;VER=%s;API=%s;"
          "ALGO=%s;CPUS=%d;URL=%s;"
          "HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
          "ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
          "UPTIME=%.0f;TS=%u|",
           PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
           algo, opt_n_threads, short_url,
 	   hrate, hrate/1000.0, accepted_share_count, rejected_share_count,
 		                                      solved_block_count,
           accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
 	   uptime, (uint32_t) ts);
   return buffer;
 }
 /**
 * Returns cpu/thread specific stats
 */
 static char *getthreads(char *params)
 {
 	*buffer = '\0';
 	for (int i = 0; i < opt_n_threads; i++)
 		cpustatus(i);
 	return buffer;
 }
 /**
 * Is remote control allowed ?
 */
 static bool check_remote_access(void)
 {
 	return (opt_api_remote > 0);
 }
 /**
 * Change pool url (see --url parameter)
 * seturl|stratum+tcp://XeVrkPrWB7pDbdFLfKhF1Z3xpqhsx6wkH3:X@stratum+tcp://mine.xpool.ca:1131|
 * seturl|stratum+tcp://Danila.1:X@pool.ipominer.com:3335|
 */
 extern bool stratum_need_reset;
 static char *remote_seturl(char *params)
 {
 	*buffer = '\0';
 	if (!check_remote_access())
 		return buffer;
 	parse_arg('o', params);
 	stratum_need_reset = true;
 	sprintf(buffer, "%s", "ok|");
 	return buffer;
 }
 /*-hash*
 * Ask the miner to quit
 */
 static char *remote_quit(char *params)
 {
 	*buffer = '\0';
 	if (!check_remote_access())
 		return buffer;
 	bye = 1;
 	sprintf(buffer, "%s", "bye|");
 	return buffer;
 }
 static char *gethelp(char *params);
 struct CMDS {
 	const char *name;
 	char *(*func)(char *);
 } cmds[] = {
 	{ "summary", getsummary },
 	{ "threads", getthreads },
 	/* remote functions */
 	{ "seturl", remote_seturl },
 	{ "quit",    remote_quit },
 	/* keep it the last */
 	{ "help",    gethelp },
 };
 #define CMDMAX ARRAY_SIZE(cmds)
 static char *gethelp(char *params)
 {
 	*buffer = '\0';
 	char * p = buffer;
 	for (int i = 0; i < CMDMAX-1; i++)
 		p += sprintf(p, "%s\n", cmds[i].name);
 	sprintf(p, "|");
 	return buffer;
 }
 static int send_result(SOCKETTYPE c, char *result)
 {
 	int n;
 	if (!result) {
 		n = (int) send(c, "", 1, 0);
 	} else {
 		// ignore failure - it's closed immediately anyway
 		n = (int) send(c, result, (int) strlen(result) + 1, 0);
 	}
 	return n;
 }
 /* ---- Base64 Encoding/Decoding Table --- */
 static const char table64[]=
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 static size_t base64_encode(const uchar *indata, size_t insize, char *outptr, size_t outlen)
 {
 	uchar ibuf[3];
 	uchar obuf[4];
 	int i, inputparts, inlen = (int) insize;
 	size_t len = 0;
 	char *output, *outbuf;
 	memset(outptr, 0, outlen);
 	outbuf = output = (char*)calloc(1, inlen * 4 / 3 + 4);
 	if (outbuf == NULL) {
 		return -1;
 	}
 	while (inlen > 0) {
 		for (i = inputparts = 0; i < 3; i++) {
 			if (inlen  > 0) {
 				inputparts++;
 				ibuf[i] = (uchar) *indata;
 				indata++; inlen--;
 			}
 			else
 				ibuf[i] = 0;
 		}
 		obuf[0] = (uchar)  ((ibuf[0] & 0xFC) >> 2);
 		obuf[1] = (uchar) (((ibuf[0] & 0x03) << 4) | ((ibuf[1] & 0xF0) >> 4));
 		obuf[2] = (uchar) (((ibuf[1] & 0x0F) << 2) | ((ibuf[2] & 0xC0) >> 6));
 		obuf[3] = (uchar)   (ibuf[2] & 0x3F);
 		switch(inputparts) {
 		case 1: /* only one byte read */
 			snprintf(output, 5, "%c%c==",
 				table64[obuf[0]],
 				table64[obuf[1]]);
 			break;
 		case 2: /* two bytes read */
 			snprintf(output, 5, "%c%c%c=",
 				table64[obuf[0]],
 				table64[obuf[1]],
 				table64[obuf[2]]);
 			break;
 		default:
 			snprintf(output, 5, "%c%c%c%c",
 				table64[obuf[0]],
 				table64[obuf[1]],
 				table64[obuf[2]],
 				table64[obuf[3]] );
 			break;
 		}
 		if ((len+4) > outlen)
 			break;
 		output += 4; len += 4;
 	}
 	len = snprintf(outptr, len, "%s", outbuf);
 	// todo: seems to be missing on linux
 	if (strlen(outptr) == 27)
 		strcat(outptr, "=");
 	free(outbuf);
 	return len;
 }
 //#include "compat/curl-for-windows/openssl/openssl/crypto/sha/sha.h"
 /* websocket handshake (tested in Chrome) */
 static int websocket_handshake(SOCKETTYPE c, char *result, char *clientkey)
 {
 	char answer[256];
 	char inpkey[128] = { 0 };
 	char seckey[64];
 	uchar sha1[20];
 	if (opt_protocol)
 		applog(LOG_DEBUG, "clientkey: %s", clientkey);
 	sprintf(inpkey, "%s258EAFA5-E914-47DA-95CA-C5AB0DC85B11", clientkey);
 	// SHA-1 test from rfc, returns in base64 "s3pPLMBiTxaQ9kYGzzhZRbK+xOo="
 	//sprintf(inpkey, "dGhlIHNhbXBsZSBub25jZQ==258EAFA5-E914-47DA-95CA-C5AB0DC85B11");
   sph_sha1_full( sha1, inpkey, strlen(inpkey) );
 	base64_encode(sha1, 20, seckey, sizeof(seckey));
 	sprintf(answer,
 		"HTTP/1.1 101 Switching Protocol\r\n"
 		"Upgrade: WebSocket\r\nConnection: Upgrade\r\n"
 		"Sec-WebSocket-Accept: %s\r\n"
 		"Sec-WebSocket-Protocol: text\r\n"
 		"\r\n", seckey);
 	// data result as tcp frame
 	uchar hd[10] = { 0 };
 	hd[0] = 129; // 0x1 text frame (FIN + opcode)
 	uint64_t datalen = (uint64_t) strlen(result);
 	uint8_t frames = 2;
 	if (datalen <= 125) {
 		hd[1] = (uchar) (datalen);
 	} else if (datalen <= 65535) {
 		hd[1] = (uchar) 126;
 		hd[2] = (uchar) (datalen >> 8);
 		hd[3] = (uchar) (datalen);
 		frames = 4;
 	} else {
 		hd[1] = (uchar) 127;
 		hd[2] = (uchar) (datalen >> 56);
 		hd[3] = (uchar) (datalen >> 48);
 		hd[4] = (uchar) (datalen >> 40);
 		hd[5] = (uchar) (datalen >> 32);
 		hd[6] = (uchar) (datalen >> 24);
 		hd[7] = (uchar) (datalen >> 16);
 		hd[8] = (uchar) (datalen >> 8);
 		hd[9] = (uchar) (datalen);
 		frames = 10;
 	}
 	size_t handlen = strlen(answer);
 	uchar *data = (uchar*) calloc(1, handlen + frames + (size_t) datalen + 1);
 	if (data == NULL)
 		return -1;
 	else {
 		uchar *p = data;
 		// HTTP header 101
 		memcpy(p, answer, handlen);
 		p += handlen;
 		// WebSocket Frame - Header + Data
 		memcpy(p, hd, frames);
 		memcpy(p + frames, result, (size_t)datalen);
 		send(c, (const char*)data, (int) (strlen(answer) + frames + (size_t)datalen + 1), 0);
 		free(data);
 	}
 	return 0;
 }
 /*
 * N.B. IP4 addresses are by Definition 32bit big endian on all platforms
 */
 static void setup_ipaccess()
 {
 	char *buf = NULL, *ptr, *comma, *slash, *dot;
 	int ipcount, mask, octet, i;
 	char group;
 	buf = (char*) calloc(1, strlen(opt_api_allow) + 1);
 	if (unlikely(!buf))
 		proper_exit(1);//, "Failed to malloc ipaccess buf");
 	strcpy(buf, opt_api_allow);
 	ipcount = 1;
 	ptr = buf;
 	while (*ptr) if (*(ptr++) == ',')
 		ipcount++;
 	// possibly more than needed, but never less
 	ipaccess = (struct IP4ACCESS *) calloc(ipcount, sizeof(struct IP4ACCESS));
 	if (unlikely(!ipaccess))
 		proper_exit(1);//, "Failed to calloc ipaccess");
 	ips = 0;
 	ptr = buf;
 	while (ptr && *ptr) {
 		while (*ptr == ' ' || *ptr == '\t')
 			ptr++;
 		if (*ptr == ',') {
 			ptr++;
 			continue;
 		}
 		comma = strchr(ptr, ',');
 		if (comma)
 			*(comma++) = '\0';
 		group = NOPRIVGROUP;
 		if (isalpha(*ptr) && *(ptr+1) == ':') {
 			if (DEFINEDGROUP(*ptr))
 				group = GROUP(*ptr);
 			ptr += 2;
 		}
 		ipaccess[ips].group = group;
 		if (strcmp(ptr, ALLIP4) == 0)
 			ipaccess[ips].ip = ipaccess[ips].mask = 0;
 		else
 		{
 			slash = strchr(ptr, '/');
 			if (!slash)
 				ipaccess[ips].mask = 0xffffffff;
 			else {
 				*(slash++) = '\0';
 				mask = atoi(slash);
 				if (mask < 1 || mask > 32)
 					goto popipo; // skip invalid/zero
 				ipaccess[ips].mask = 0;
 				while (mask-- >= 0) {
 					octet = 1 << (mask % 8);
 					ipaccess[ips].mask |= (octet << (24 - (8 * (mask >> 3))));
 				}
 			}
 			ipaccess[ips].ip = 0; // missing default to '.0'
 			for (i = 0; ptr && (i < 4); i++) {
 				dot = strchr(ptr, '.');
 				if (dot)
 					*(dot++) = '\0';
 				octet = atoi(ptr);
 				if (octet < 0 || octet > 0xff)
 					goto popipo; // skip invalid
 				ipaccess[ips].ip |= (octet << (24 - (i * 8)));
 				ptr = dot;
 			}
 			ipaccess[ips].ip &= ipaccess[ips].mask;
 		}
 		ips++;
 popipo:
 		ptr = comma;
 	}
 	free(buf);
 }
 static bool check_connect(struct sockaddr_in *cli, char **connectaddr, char *group)
 {
 	bool addrok = false;
 	*connectaddr = inet_ntoa(cli->sin_addr);
 	*group = NOPRIVGROUP;
 	if (opt_api_allow) {
 		int client_ip = htonl(cli->sin_addr.s_addr);
 		for (int i = 0; i < ips; i++) {
 			if ((client_ip & ipaccess[i].mask) == ipaccess[i].ip) {
 				addrok = true;
 				*group = ipaccess[i].group;
 				break;
 			}
 		}
 	}
 	else
 		addrok = (strcmp(*connectaddr, localaddr) == 0);
 	return addrok;
 }
 static void api()
 {
 	const char *addr = opt_api_allow;
 	unsigned short port = (unsigned short) opt_api_listen; // 4048
 	char buf[MYBUFSIZ];
 	int c, n, bound;
 	char *connectaddr;
 	char *binderror;
 	char group;
 	time_t bindstart;
 	struct sockaddr_in serv;
 	struct sockaddr_in cli;
 	uint32_t clisiz;
 	bool addrok = false;
 	long long counter;
 	char *result;
 	char *params;
 	int i;
 	SOCKETTYPE *apisock;
 	if (!opt_api_listen && opt_debug) {
 		applog(LOG_DEBUG, "API disabled");
 		return;
 	}
 	if (opt_api_allow) {
 		setup_ipaccess();
 		if (ips == 0) {
 			applog(LOG_WARNING, "API not running (no valid IPs specified)%s", UNAVAILABLE);
 		}
 	}
 	apisock = (SOCKETTYPE*) calloc(1, sizeof(*apisock));
 	*apisock = INVSOCK;
 	sleep(1);
 	*apisock = socket(AF_INET, SOCK_STREAM, 0);
 	if (*apisock == INVSOCK) {
 		applog(LOG_ERR, "API initialisation failed (%s)%s", strerror(errno), UNAVAILABLE);
 		return;
 	}
 	memset(&serv, 0, sizeof(serv));
 	serv.sin_family = AF_INET;
 	serv.sin_addr.s_addr = inet_addr(addr);
 	if (serv.sin_addr.s_addr == (in_addr_t)INVINETADDR) {
 		applog(LOG_ERR, "API initialisation 2 failed (%s)%s", strerror(errno), UNAVAILABLE);
 		return;
 	}
 	serv.sin_port = htons(port);
 #ifndef WIN32
 	// On linux with SO_REUSEADDR, bind will get the port if the previous
 	// socket is closed (even if it is still in TIME_WAIT) but fail if
 	// another program has it open - which is what we want
 	int optval = 1;
 	// If it doesn't work, we don't really care - just show a debug message
 	if (SOCKETFAIL(setsockopt(*apisock, SOL_SOCKET, SO_REUSEADDR, (void *)(&optval), sizeof(optval))))
 	        applog(LOG_DEBUG, "API setsockopt SO_REUSEADDR failed (ignored): %s", SOCKERRMSG);
 #else
 	// On windows a 2nd program can bind to a port>1024 already in use unless
 	// SO_EXCLUSIVEADDRUSE is used - however then the bind to a closed port
 	// in TIME_WAIT will fail until the timeout - so we leave the options alone
 #endif
 	// try for 1 minute ... in case the old one hasn't completely gone yet
 	bound = 0;
 	bindstart = time(NULL);
 	while (bound == 0) {
 		if (bind(*apisock, (struct sockaddr *)(&serv), sizeof(serv)) < 0) {
 			binderror = strerror(errno);
 			if ((time(NULL) - bindstart) > 61)
 				break;
 			else {
 				if (!opt_quiet || opt_debug)
 					applog(LOG_WARNING, "API bind to port %d failed - trying again in 20sec", port);
 				sleep(20);
 			}
 		}
 		else
 			bound = 1;
 	}
 	if (bound == 0) {
 		applog(LOG_WARNING, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
 		free(apisock);
 		return;
 	}
 	if (SOCKETFAIL(listen(*apisock, QUEUE))) {
 		applog(LOG_ERR, "API initialisation 3 failed (%s)%s", strerror(errno), UNAVAILABLE);
 		CLOSESOCKET(*apisock);
 		free(apisock);
 		return;
 	}
 	buffer = (char *) calloc(1, MYBUFSIZ + 1);
 	counter = 0;
 	while (bye == 0) {
 		counter++;
 		clisiz = sizeof(cli);
 		if (SOCKETFAIL(c = accept((SOCKETTYPE)*apisock, (struct sockaddr *)(&cli), &clisiz))) {
 			applog(LOG_ERR, "API failed (%s)%s", strerror(errno), UNAVAILABLE);
 			CLOSESOCKET(*apisock);
 			free(apisock);
 			free(buffer);
 			return;
 		}
 		addrok = check_connect(&cli, &connectaddr, &group);
 		if (opt_debug && opt_protocol)
 			applog(LOG_DEBUG, "API: connection from %s - %s",
 				connectaddr, addrok ? "Accepted" : "Ignored");
 		if (addrok) {
 			bool fail;
 			char *wskey = NULL;
 			n = recv(c, &buf[0], SOCK_REC_BUFSZ, 0);
 			fail = SOCKETFAIL(n);
 			if (fail)
 				buf[0] = '\0';
 			else if (n > 0 && buf[n-1] == '\n') {
 				/* telnet compat \r\n */
 				buf[n-1] = '\0'; n--;
 				if (n > 0 && buf[n-1] == '\r')
 					buf[n-1] = '\0';
 			}
 			if (n >= 0)
 				buf[n] = '\0';
 			//if (opt_debug && opt_protocol && n > 0)
 			//	applog(LOG_DEBUG, "API: recv command: (%d) '%s'+char(%x)", n, buf, buf[n-1]);
 			if (!fail) {
 				char *msg = NULL;
 				/* Websocket requests compat. */
 				if ((msg = strstr(buf, "GET /")) && strlen(msg) > 5) {
 					char cmd[256] = { 0 };
 					sscanf(&msg[5], "%s\n", cmd);
 					params = strchr(cmd, '/');
 					if (params)
 						*(params++) = '|';
 					params = strchr(cmd, '/');
 					if (params)
 						*(params++) = '\0';
 					wskey = strstr(msg, "Sec-WebSocket-Key");
 					if (wskey) {
 						char *eol = strchr(wskey, '\r');
 						if (eol) *eol = '\0';
 						wskey = strchr(wskey, ':');
 						wskey++;
 						while ((*wskey) == ' ') wskey++; // ltrim
 					}
 					n = sprintf(buf, "%s", cmd);
 				}
 				params = strchr(buf, '|');
 				if (params != NULL)
 					*(params++) = '\0';
 				if (opt_debug && opt_protocol && n > 0)
 					applog(LOG_DEBUG, "API: exec command %s(%s)", buf, params);
 				for (i = 0; i < CMDMAX; i++) {
 					if (strcmp(buf, cmds[i].name) == 0) {
 						if (params && strlen(params)) {
 							// remove possible trailing |
 							if (params[strlen(params) - 1] == '|')
 								params[strlen(params) - 1] = '\0';
 						}
 						result = (cmds[i].func)(params);
 						if (wskey) {
 							websocket_handshake(c, result, wskey);
 							break;
 						}
 						send_result(c, result);
 						break;
 					}
 				}
 				CLOSESOCKET(c);
 			}
 		}
 	}
 	CLOSESOCKET(*apisock);
 	free(apisock);
 	free(buffer);
 }
 /* external access */
 void *api_thread(void *userdata)
 {
 	struct thr_info *mythr = (struct thr_info*)userdata;
 	startup = time(NULL);
 	api();
 	tq_freeze(mythr->q);
 	if (bye) {
 		// quit command
 		proper_exit(1);
 	}
 	return NULL;
 }
--- a/rin/miner/cpuminer/armbuild-all.sh
+++ b/rin/miner/cpuminer/armbuild-all.sh
@@ -1,57 +0,0 @@
 #!/bin/bash
 #
 # This script is not intended for users, it is only used for compile testing
 # during develpment. However the information contained may provide compilation
 # tips to users.
 rm cpuminer cpuminer-armv9-crypto-sha3 cpuminer-armv9-crypto cpuminer-armv9 cpuminer-armv8.5-crypto-sha3-sve2 cpuminer-armv8.4-crypto-sha3 cpuminer-armv8 cpuminer-armv8-crypto cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2-sha cpuminer-avx2-sha-vaes cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 cpuminer-zen4 cpuminer-alderlake cpuminer-x64 > /dev/null
 # armv9 needs gcc-13
 # -march-armv9-a includes SVE2 but no crypto
 # -march=armv9-a+crypto adds AES & SHA2 but not SHA512
 make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O3 -march=armv9-a+crypto+sha3 -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv9-crypto-sha3
 make clean || echo clean
 CFLAGS="-O3 -march=armv9-a+crypto -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv9-crypto
 make clean || echo clean
 CFLAGS="-O3 -march=armv9-a -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv9
 # SVE2 available in armv8.5
 make clean || echo clean
 CFLAGS="-O3 -march=armv8.5-a+crypto+sha3+sve2 -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv8.5-crypto-sha3-sve2
 # SHA3 available in armv8.4
 make clean || echo clean
 CFLAGS="-O3 -march=armv8.4-a+crypto+sha3 -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv8.4-crypto-sha3
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=armv8-a+crypto -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv8-crypto
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=armv8-a -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 mv cpuminer cpuminer-armv8
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure  --with-curl     
 make -j $(nproc)
--- a/rin/miner/cpuminer/autogen.sh
+++ b/rin/miner/cpuminer/autogen.sh
@@ -1,13 +0,0 @@
 #!/bin/sh
 # You need autoconf 2.5x, preferably 2.57 or later
 # You need automake 1.7 or later. 1.6 might work.
 set -e
 aclocal
 autoheader
 automake --foreign --add-missing --force-missing --copy
 # automake --gnu --add-missing --copy
 autoconf
--- a/rin/miner/cpuminer/bitcoin-config.h
+++ b/rin/miner/cpuminer/bitcoin-config.h
@@ -1,375 +0,0 @@
 /* src/config/bitcoin-config.h.  Generated from bitcoin-config.h.in by configure.  */
 /* src/config/bitcoin-config.h.in.  Generated from configure.ac by autoheader.  */
 #ifndef BITCOIN_CONFIG_H
 #define BITCOIN_CONFIG_H
 /* Define if building universal (internal helper macro) */
 /* #undef AC_APPLE_UNIVERSAL_BUILD */
 /* Version Build */
 #define CLIENT_VERSION_BUILD 0
 /* Version is release */
 #define CLIENT_VERSION_IS_RELEASE true
 /* Major version */
 #define CLIENT_VERSION_MAJOR 0
 /* Minor version */
 #define CLIENT_VERSION_MINOR 11
 /* Build revision */
 #define CLIENT_VERSION_REVISION 2
 /* Version is release */
 #define COPYRIGHT_YEAR 2015
 /* Define to 1 to enable wallet functions */
 #define ENABLE_WALLET 1
 /* parameter and return value type for __fdelt_chk */
 /* #undef FDELT_TYPE */
 /* define if the Boost library is available */
 #define HAVE_BOOST /**/
 /* define if the Boost::Chrono library is available */
 #define HAVE_BOOST_CHRONO /**/
 /* define if the Boost::Filesystem library is available */
 #define HAVE_BOOST_FILESYSTEM /**/
 /* define if the Boost::PROGRAM_OPTIONS library is available */
 #define HAVE_BOOST_PROGRAM_OPTIONS /**/
 /* define if the Boost::System library is available */
 #define HAVE_BOOST_SYSTEM /**/
 /* define if the Boost::Thread library is available */
 #define HAVE_BOOST_THREAD /**/
 /* define if the Boost::Unit_Test_Framework library is available */
 #define HAVE_BOOST_UNIT_TEST_FRAMEWORK /**/
 /* Define to 1 if you have the <byteswap.h> header file. */
 #define HAVE_BYTESWAP_H 1
 /* Define this symbol if the consensus lib has been built */
 #define HAVE_CONSENSUS_LIB 1
 /* Define to 1 if you have the declaration of `be16toh', and to 0 if you
   don't. */
 #define HAVE_DECL_BE16TOH 1
 /* Define to 1 if you have the declaration of `be32toh', and to 0 if you
   don't. */
 #define HAVE_DECL_BE32TOH 1
 /* Define to 1 if you have the declaration of `be64toh', and to 0 if you
   don't. */
 #define HAVE_DECL_BE64TOH 1
 /* Define to 1 if you have the declaration of `bswap_16', and to 0 if you
   don't. */
 #define HAVE_DECL_BSWAP_16 1
 /* Define to 1 if you have the declaration of `bswap_32', and to 0 if you
   don't. */
 #define HAVE_DECL_BSWAP_32 1
 /* Define to 1 if you have the declaration of `bswap_64', and to 0 if you
   don't. */
 #define HAVE_DECL_BSWAP_64 1
 /* Define to 1 if you have the declaration of `htobe16', and to 0 if you
   don't. */
 #define HAVE_DECL_HTOBE16 1
 /* Define to 1 if you have the declaration of `htobe32', and to 0 if you
   don't. */
 #define HAVE_DECL_HTOBE32 1
 /* Define to 1 if you have the declaration of `htobe64', and to 0 if you
   don't. */
 #define HAVE_DECL_HTOBE64 1
 /* Define to 1 if you have the declaration of `htole16', and to 0 if you
   don't. */
 #define HAVE_DECL_HTOLE16 1
 /* Define to 1 if you have the declaration of `htole32', and to 0 if you
   don't. */
 #define HAVE_DECL_HTOLE32 1
 /* Define to 1 if you have the declaration of `htole64', and to 0 if you
   don't. */
 #define HAVE_DECL_HTOLE64 1
 /* Define to 1 if you have the declaration of `le16toh', and to 0 if you
   don't. */
 #define HAVE_DECL_LE16TOH 1
 /* Define to 1 if you have the declaration of `le32toh', and to 0 if you
   don't. */
 #define HAVE_DECL_LE32TOH 1
 /* Define to 1 if you have the declaration of `le64toh', and to 0 if you
   don't. */
 #define HAVE_DECL_LE64TOH 1
 /* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
   don't. */
 #define HAVE_DECL_STRERROR_R 1
 /* Define to 1 if you have the declaration of `strnlen', and to 0 if you
   don't. */
 #define HAVE_DECL_STRNLEN 1
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #define HAVE_DLFCN_H 1
 /* Define to 1 if you have the <endian.h> header file. */
 #define HAVE_ENDIAN_H 1
 /* Define to 1 if the system has the `dllexport' function attribute */
 /* #undef HAVE_FUNC_ATTRIBUTE_DLLEXPORT */
 /* Define to 1 if the system has the `dllimport' function attribute */
 /* #undef HAVE_FUNC_ATTRIBUTE_DLLIMPORT */
 /* Define to 1 if the system has the `visibility' function attribute */
 #define HAVE_FUNC_ATTRIBUTE_VISIBILITY 1
 /* Define this symbol if you have getaddrinfo_a */
 #define HAVE_GETADDRINFO_A 1
 /* Define this symbol if you have inet_pton */
 #define HAVE_INET_PTON 1
 /* Define to 1 if you have the <inttypes.h> header file. */
 #define HAVE_INTTYPES_H 1
 /* Define to 1 if you have the `advapi32' library (-ladvapi32). */
 /* #undef HAVE_LIBADVAPI32 */
 /* Define to 1 if you have the `comctl32' library (-lcomctl32). */
 /* #undef HAVE_LIBCOMCTL32 */
 /* Define to 1 if you have the `comdlg32' library (-lcomdlg32). */
 /* #undef HAVE_LIBCOMDLG32 */
 /* Define to 1 if you have the `crypt32' library (-lcrypt32). */
 /* #undef HAVE_LIBCRYPT32 */
 /* Define to 1 if you have the `crypto' library (-lcrypto). */
 #define HAVE_LIBCRYPTO 1
 /* Define to 1 if you have the `gdi32' library (-lgdi32). */
 /* #undef HAVE_LIBGDI32 */
 /* Define to 1 if you have the `imm32' library (-limm32). */
 /* #undef HAVE_LIBIMM32 */
 /* Define to 1 if you have the `iphlpapi' library (-liphlpapi). */
 /* #undef HAVE_LIBIPHLPAPI */
 /* Define to 1 if you have the `jpeg ' library (-ljpeg ). */
 /* #undef HAVE_LIBJPEG_ */
 /* Define to 1 if you have the `kernel32' library (-lkernel32). */
 /* #undef HAVE_LIBKERNEL32 */
 /* Define to 1 if you have the `mingwthrd' library (-lmingwthrd). */
 /* #undef HAVE_LIBMINGWTHRD */
 /* Define to 1 if you have the `mswsock' library (-lmswsock). */
 /* #undef HAVE_LIBMSWSOCK */
 /* Define to 1 if you have the `ole32' library (-lole32). */
 /* #undef HAVE_LIBOLE32 */
 /* Define to 1 if you have the `oleaut32' library (-loleaut32). */
 /* #undef HAVE_LIBOLEAUT32 */
 /* Define to 1 if you have the `png ' library (-lpng ). */
 /* #undef HAVE_LIBPNG_ */
 /* Define to 1 if you have the `rpcrt4' library (-lrpcrt4). */
 /* #undef HAVE_LIBRPCRT4 */
 /* Define to 1 if you have the `shell32' library (-lshell32). */
 /* #undef HAVE_LIBSHELL32 */
 /* Define to 1 if you have the `shlwapi' library (-lshlwapi). */
 /* #undef HAVE_LIBSHLWAPI */
 /* Define to 1 if you have the `ssp' library (-lssp). */
 /* #undef HAVE_LIBSSP */
 /* Define to 1 if you have the `user32' library (-luser32). */
 /* #undef HAVE_LIBUSER32 */
 /* Define to 1 if you have the `uuid' library (-luuid). */
 /* #undef HAVE_LIBUUID */
 /* Define to 1 if you have the `winmm' library (-lwinmm). */
 /* #undef HAVE_LIBWINMM */
 /* Define to 1 if you have the `winspool' library (-lwinspool). */
 /* #undef HAVE_LIBWINSPOOL */
 /* Define to 1 if you have the `ws2_32' library (-lws2_32). */
 /* #undef HAVE_LIBWS2_32 */
 /* Define to 1 if you have the `z ' library (-lz ). */
 /* #undef HAVE_LIBZ_ */
 /* Define to 1 if you have the <memory.h> header file. */
 #define HAVE_MEMORY_H 1
 /* Define to 1 if you have the <miniupnpc/miniupnpc.h> header file. */
 #define HAVE_MINIUPNPC_MINIUPNPC_H 1
 /* Define to 1 if you have the <miniupnpc/miniwget.h> header file. */
 #define HAVE_MINIUPNPC_MINIWGET_H 1
 /* Define to 1 if you have the <miniupnpc/upnpcommands.h> header file. */
 #define HAVE_MINIUPNPC_UPNPCOMMANDS_H 1
 /* Define to 1 if you have the <miniupnpc/upnperrors.h> header file. */
 #define HAVE_MINIUPNPC_UPNPERRORS_H 1
 /* Define this symbol if you have MSG_NOSIGNAL */
 #define HAVE_MSG_NOSIGNAL 1
 /* Define if you have POSIX threads libraries and header files. */
 #define HAVE_PTHREAD 1
 /* Have PTHREAD_PRIO_INHERIT. */
 #define HAVE_PTHREAD_PRIO_INHERIT 1
 /* Define to 1 if you have the <stdint.h> header file. */
 #define HAVE_STDINT_H 1
 /* Define to 1 if you have the <stdio.h> header file. */
 #define HAVE_STDIO_H 1
 /* Define to 1 if you have the <stdlib.h> header file. */
 #define HAVE_STDLIB_H 1
 /* Define to 1 if you have the `strerror_r' function. */
 #define HAVE_STRERROR_R 1
 /* Define to 1 if you have the <strings.h> header file. */
 #define HAVE_STRINGS_H 1
 /* Define to 1 if you have the <string.h> header file. */
 #define HAVE_STRING_H 1
 /* Define to 1 if you have the <sys/endian.h> header file. */
 /* #undef HAVE_SYS_ENDIAN_H */
 /* Define to 1 if you have the <sys/prctl.h> header file. */
 #define HAVE_SYS_PRCTL_H 1
 /* Define to 1 if you have the <sys/select.h> header file. */
 #define HAVE_SYS_SELECT_H 1
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #define HAVE_SYS_STAT_H 1
 /* Define to 1 if you have the <sys/types.h> header file. */
 #define HAVE_SYS_TYPES_H 1
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 /* Define if the visibility attribute is supported. */
 #define HAVE_VISIBILITY_ATTRIBUTE 1
 /* Define this symbol if boost sleep works */
 /* #undef HAVE_WORKING_BOOST_SLEEP */
 /* Define this symbol if boost sleep_for works */
 #define HAVE_WORKING_BOOST_SLEEP_FOR 1
 /* Define to the sub-directory in which libtool stores uninstalled libraries.
   */
 #define LT_OBJDIR ".libs/"
 /* Define to the address where bug reports for this package should be sent. */
 //#define PACKAGE_BUGREPORT "https://github.com/FreeTrade/HOdlcoin/issues"
 /* Define to the full name of this package. */
 //#define PACKAGE_NAME "HOdlcoin Core"
 /* Define to the full name and version of this package. */
 //#define PACKAGE_STRING "HOdlcoin Core 0.11.2"
 /* Define to the one symbol short name of this package. */
 //#define PACKAGE_TARNAME "hodlcoin"
 /* Define to the home page for this package. */
 //#define PACKAGE_URL ""
 /* Define to the version of this package. */
 //#define PACKAGE_VERSION "0.11.2"
 /* Define to necessary symbol if this constant uses a non-standard name on
   your system. */
 /* #undef PTHREAD_CREATE_JOINABLE */
 /* Define this symbol if the qt platform is cocoa */
 /* #undef QT_QPA_PLATFORM_COCOA */
 /* Define this symbol if the qt platform is windows */
 /* #undef QT_QPA_PLATFORM_WINDOWS */
 /* Define this symbol if the qt platform is xcb */
 /* #undef QT_QPA_PLATFORM_XCB */
 /* Define this symbol if qt plugins are static */
 /* #undef QT_STATICPLUGIN */
 /* Define to 1 if you have the ANSI C header files. */
 #define STDC_HEADERS 1
 /* Define to 1 if strerror_r returns char *. */
 #define STRERROR_R_CHAR_P 1
 /* Define if dbus support should be compiled in */
 /* #undef USE_DBUS */
 /* Define if QR support should be compiled in */
 /* #undef USE_QRCODE */
 /* UPnP support not compiled if undefined, otherwise value (0 or 1) determines
   default state */
 #define USE_UPNP 0
 /* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
   significant byte first (like Motorola and SPARC, unlike Intel). */
 #if defined AC_APPLE_UNIVERSAL_BUILD
 # if defined __BIG_ENDIAN__
 #  define WORDS_BIGENDIAN 1
 # endif
 #else
 # ifndef WORDS_BIGENDIAN
 /* #  undef WORDS_BIGENDIAN */
 # endif
 #endif
 /* Enable large inode numbers on Mac OS X 10.5.  */
 #ifndef _DARWIN_USE_64_BIT_INODE
 # define _DARWIN_USE_64_BIT_INODE 1
 #endif
 /* Number of bits in a file offset, on hosts where this is settable. */
 /* #undef _FILE_OFFSET_BITS */
 /* Define for large files, on AIX-style hosts. */
 /* #undef _LARGE_FILES */
 #endif //BITCOIN_CONFIG_H
--- a/rin/miner/cpuminer/build-allarch.sh
+++ b/rin/miner/cpuminer/build-allarch.sh
@@ -1,181 +0,0 @@
 #!/bin/bash
 #
 # This script is not intended for users, it is only used for compile testing
 # during develpment. However the information contained may provide compilation
 # tips to users.
 rm cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 cpuminer-zen4 cpuminer-zen5 cpuminer-alderlake cpuminer-x64 cpuminer-armv8 cpuminer-armv8-aes cpuminer-armv8-sha2 cpuminer-armv8-aes-sha2  > /dev/null
 # AVX512 SHA VAES: Intel Core Icelake, Rocketlake
 make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O3 -march=icelake-client -Wall" ./configure --with-curl
 # Rocketlake needs gcc-11
 #CFLAGS="-O3 -march=rocketlake -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx512-sha-vaes
 # Intel Core Alderlake: AVX2 SHA VAES, needs gcc-12
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-O3 -march=alderlake -Wall" ./configure --with-curl
 #make -j 8
 #strip -s cpuminer
 #mv cpuminer cpuminer-alderlake
 # Intel Core Arrowlake-s: AVX2 SHA512 VAES, needs gcc-14
 # Arrowlake-s includes SHA512, Arrowlake does not?
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-O3 -march=arrowlake-s -Wall" ./configure --with-curl
 #make -j 8
 #strip -s cpuminer
 #mv cpuminer cpuminer-arrowlake-s
 # Intel Core Graniterapids: AVX512, SHA256, VAES, needs gcc-14
 # Apparently Granitrapids will not include AVX10, SHA512 or APX,
 # wait for Diamondrapids & gcc-15.
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-O3 -march=graniterapids -Wall" ./configure --with-curl
 #make -j 8
 #strip -s cpuminer
 #mv cpuminer cpuminer-graniterapids
 # Force AVX10-256
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-O3 -march=arrowlake-s -mavx10.1-256 -Wall" ./configure --with-curl
 #make -j 8
 #strip -s cpuminer
 #mv cpuminer cpuminer-avx10-256
 # Force SHA512 AVX10-512
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-O3 -march=graniterapids -msha512 -mavx10.1-512 -Wall" ./configure --with-curl
 #make -j 8
 #strip -s cpuminer
 #mv cpuminer cpuminer-avx10-512
 # Zen5: AVX512 SHA VAES, requires gcc-14.
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-O3 -march=znver5 -Wall" ./configure --with-curl
 #make -j $(nproc)
 #strip -s cpuminer
 #mv cpuminer cpuminer-zen5
 # Zen4: AVX512 SHA VAES
 make clean || echo clean
 rm -f config.status
 # znver3 needs gcc-11, znver4 needs gcc-12.3.
 #CFLAGS="-O3 -march=znver4 -Wall" ./configure --with-curl
 # Inclomplete list of Zen4 AVX512 extensions but includes all extensions used by cpuminer.
 CFLAGS="-O3 -march=znver3 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq -Wall" ./configure --with-curl
 #CFLAGS="-O3 -march=znver2 -mvaes -mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-zen4
 # Zen3 AVX2 SHA VAES
 make clean || echo clean
 rm -f config.status
 #CFLAGS="-O3 -march=znver2 -mvaes" ./configure --with-curl
 CFLAGS="-O3 -march=znver3 -fno-common " ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-zen3
 # AVX512 AES: Intel Core HEDT Sylake-X, Cascadelake
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=skylake-avx512 -maes -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx512
 # AVX2 SHA VAES: generic, zen3, alderlake...arrowlake
 make clean || echo done
 rm -f config.status
 # vaes doesn't include aes
 CFLAGS="-O3 -maes -mavx2 -msha -mvaes -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx2-sha-vaes
 # AVX2 SHA AES: AMD Zen1
 make clean || echo done
 rm -f config.status
 #CFLAGS="-O3 -march=znver1 -maes -Wall" ./configure --with-curl
 CFLAGS="-O3 -maes -mavx2 -msha -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx2-sha
 # AVX2 AES: Intel Haswell..Cometlake
 make clean || echo clean
 rm -f config.status
 # GCC 9 doesn't include AES with core-avx2
 CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx2
 # AVX AES: Intel Sandybridge, Ivybridge
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-avx
 # SSE4.2 AES: Intel Westmere, most Pentium & Celeron
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=westmere -maes -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-aes-sse42
 # SSE4.2: Intel Nehalem
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-sse42
 # SSSE3: Intel Core2
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-ssse3
 # SSE2
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -msse2 -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-sse2
 # X86_64
 make clean || echo clean
 rm -f config.status
 CFLAGS="-O3 -march=x86-64 -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
 mv cpuminer cpuminer-x64
 # Native to host CPU
 make clean || echo done
 rm -f config.status
 CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
 make -j $(nproc)
 strip -s cpuminer
--- a/rin/miner/cpuminer/build-armv8.sh
+++ b/rin/miner/cpuminer/build-armv8.sh
@@ -1,15 +0,0 @@
 #!/bin/bash
 # Linux build
 make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure  --with-curl  --host=aarch64-cortexa76-elf --build=x86_64-pc-linux-gnu --target=aarch64-cortexa76-elf
 #CFLAGS="-O2 -march=armv8-a+crypto+sha2+aes -Wall -flax-vector-conversions" ./configure  --with-curl
 make -j $(nproc)
 strip -s cpuminer
--- a/rin/miner/cpuminer/build-avx2.sh
+++ b/rin/miner/cpuminer/build-avx2.sh
@@ -1,9 +0,0 @@
 #!/bin/sh
 # Linux build
 make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O3 -march=haswell -maes -Wall" ./configure --with-curl
 make -j $(nproc)
--- a/rin/miner/cpuminer/build.sh
+++ b/rin/miner/cpuminer/build.sh
@@ -1,9 +0,0 @@
 #!/bin/sh
 make distclean || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
 make -j $(nproc)
 #strip -s cpuminer
--- a/rin/miner/cpuminer/clean-all.sh
+++ b/rin/miner/cpuminer/clean-all.sh
@@ -1,9 +0,0 @@
 #!/bin/sh
 #
 # make clean and rm all the targetted executables.
 rm cpuminer-avx10* cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-alderlake cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 cpuminer-armv9 cpuminer-armv9-crypto cpuminer-armv9-crypto-sha3 cpuminer-armv8.4-crypto-sha3 cpuminer-armv8.5-crypto-sha3-sve2  cpuminer-armv8-crypto cpuminer-armv8 > /dev/null
 rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe cpuminer-zen3.exe cpuminer-zen4.exe cpuminer-x64.exe > /dev/null
 make distclean > /dev/null
--- a/rin/miner/cpuminer/compat.h
+++ b/rin/miner/cpuminer/compat.h
@@ -1,96 +0,0 @@
 #ifndef __COMPAT_H__
 #define __COMPAT_H__
 #ifdef WIN32
 #if _WIN32_WINNT>=0x0601    // Windows 7
 #define WINDOWS_CPU_GROUPS_ENABLED 1
 #endif
 #include <windows.h>
 #include <time.h>
 #ifndef localtime_r
 #define localtime_r(src, dst) localtime_s(dst, src)
 #endif
 #define sleep(secs) Sleep((secs) * 1000)
 enum {
 	PRIO_PROCESS		= 0,
 };
 extern int opt_priority;
 static __inline int setpriority(int which, int who, int prio)
 {
 	switch (opt_priority) {
 		case 5:
 			prio = THREAD_PRIORITY_TIME_CRITICAL;
 			break;
 		case 4:
 			prio = THREAD_PRIORITY_HIGHEST;
 			break;
 		case 3:
 			prio = THREAD_PRIORITY_ABOVE_NORMAL;
 			break;
 		case 2:
 			prio = THREAD_PRIORITY_NORMAL;
 			break;
 		case 1:
 			prio = THREAD_PRIORITY_BELOW_NORMAL;
 			break;
 		case 0:
 		default:
 			prio = THREAD_PRIORITY_IDLE;
 	}
 	return -!SetThreadPriority(GetCurrentThread(), prio);
 }
 #ifdef _MSC_VER
 #define snprintf(...) _snprintf(__VA_ARGS__)
 #define strdup(...) _strdup(__VA_ARGS__)
 #define strncasecmp(x,y,z) _strnicmp(x,y,z)
 #define strcasecmp(x,y) _stricmp(x,y)
 #define __func__ __FUNCTION__
 #define __thread __declspec(thread)
 #define _ALIGN(x) __declspec(align(x))
 typedef int ssize_t;
 #include <stdlib.h>
 // This static var is made to be compatible with linux/mingw (no free on string result)
 // This is not thread safe but we only use that once on process start
 static char dirname_buffer[_MAX_PATH] = { 0 };
 static __inline char * dirname(char *file) {
 	char drive[_MAX_DRIVE] = { 0 };
 	char dir[_MAX_DIR] = { 0 };
 	char fname[_MAX_FNAME], ext[_MAX_EXT];
 	_splitpath_s(file, drive, _MAX_DRIVE, dir, _MAX_DIR, fname, _MAX_FNAME, ext, _MAX_EXT);
 	if (dir && strlen(dir) && dir[strlen(dir)-1] == '\\') {
 		dir[strlen(dir) - 1] = '\0';
 	}
 	sprintf(dirname_buffer, "%s%s", drive, dir);
 	return &dirname_buffer[0];
 }
 #endif
 #endif /* WIN32 */
 #ifndef _MSC_VER
 #define _ALIGN(x) __attribute__ ((aligned(x)))
 #endif
 #undef unlikely
 #undef likely
 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
 #define unlikely(expr) (__builtin_expect(!!(expr), 0))
 #define likely(expr) (__builtin_expect(!!(expr), 1))
 #else
 #define unlikely(expr) (expr)
 #define likely(expr) (expr)
 #endif
 #ifndef WIN32
 #define MAX_PATH PATH_MAX
 #endif
 #endif /* __COMPAT_H__ */
--- a/rin/miner/cpuminer/config-template.json
+++ b/rin/miner/cpuminer/config-template.json
@@ -1,22 +0,0 @@
 {
 	"_comment" : "Any long-format command line argument ",
 	"_comment" : "may be used in this JSON configuration file",
   "_comment" : "Additional arguments may be added to the command line.",
   "_comment" : "Usage: cpuminer -c myconfig.json [additional arguments]",
   "_comment" : "Required arguments, replace dummy values",
 	"url" : "stratum+tcp://example.com:3333",
 	"user" : "read.pool.instructions",
 	"pass" : "x.often.works",
 	"algo" : "algo",
   "_comment" : "Often used optional arguments with default values selected.",
   "_comment" : "Change values, add or delete arguments as desired.",
   "threads" : 0,
 	"cpu-affinity" : -1,
   "api-bind" : "127.0.0.1:4048",
 	"benchmark" : false,
 	"quiet" : false
 }
--- a/rin/miner/cpuminer/configure
+++ b/rin/miner/cpuminer/configure
--- a/rin/miner/cpuminer/configure.ac
+++ b/rin/miner/cpuminer/configure.ac
@@ -1,139 +0,0 @@
 AC_INIT([cpuminer-opt], [25.3])
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
 AC_CONFIG_SRCDIR([cpu-miner.c])
 AC_CONFIG_HEADERS([cpuminer-config.h])
 AC_CONFIG_MACRO_DIR([m4])
 AM_INIT_AUTOMAKE([foreign subdir-objects])
 dnl Make sure anyone changing configure.ac/Makefile.am has a clue
 AM_MAINTAINER_MODE
 dnl Checks for programs
 AC_PROG_CC_C99
 AC_PROG_GCC_TRADITIONAL
 AM_PROG_CC_C_O
 AM_PROG_AS
 AC_PROG_RANLIB
 AC_PROG_CXX
 dnl Checks for header files
 AC_HEADER_STDC
 AC_CHECK_HEADERS([sys/endian.h sys/param.h syslog.h])
 # sys/sysctl.h requires sys/types.h on FreeBSD
 # sys/sysctl.h requires sys/param.h on OpenBSD
 AC_CHECK_HEADERS([sys/sysctl.h], [], [],
 [#include <sys/types.h>
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h>
 #endif
 ])
 AC_CHECK_DECLS([be32dec, le32dec, be32enc, le32enc, le16dec, le16enc], [], [],
 [AC_INCLUDES_DEFAULT
 #ifdef HAVE_SYS_ENDIAN_H
 #include <sys/endian.h>
 #endif
 ])
 AC_FUNC_ALLOCA
 AC_CHECK_FUNCS([getopt_long])
 case $target in
  x86_64-*-*|amd64-*-*)
    have_x86_64=true
    ;;
  aarch64*-*-*|arm64*-*-*)
    have_arm64=true
    ;;
  powerpc*-*-*)
    have_ppc=true
    ;;
 esac
 PTHREAD_FLAGS="-pthread"
 WS2_LIBS=""
 case $target in
  *-*-mingw*)
    have_win32=true
    PTHREAD_FLAGS=""
    WS2_LIBS="-lws2_32"
    ;;
  *-apple-*)
    have_apple=true
    ;;
 esac
 AC_ARG_ENABLE([assembly],
  AS_HELP_STRING([--disable-assembly], [disable assembly-language routines]))
 if test x$enable_assembly != xno; then
  AC_DEFINE([USE_ASM], [1], [Define to 1 if assembly routines are wanted.])
 fi
 # jansson test fails on Linux/Mingw, handled in Makefile.am.
 AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
 AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthread",
  AC_CHECK_LIB([pthreadGC2], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",
    AC_CHECK_LIB([pthreadGC1], [pthread_create], PTHREAD_LIBS="-lpthreadGC1",
      AC_CHECK_LIB([pthreadGC], [pthread_create], PTHREAD_LIBS="-lpthreadGC"
 ))))
 AC_MSG_CHECKING(whether __uint128_t is supported)
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([static __uint128_t i = 100;])],
      AC_DEFINE(USE_INT128, 1, [Define if __uint128_t is available])
      AC_MSG_RESULT(yes)
    ,
      AC_MSG_RESULT(no)
 )
 # allow if <var> in Makefile.am
 AM_CONDITIONAL([WANT_JANSSON], [test x$request_jansson = xtrue])
 AM_CONDITIONAL([HAVE_WINDOWS], [test x$have_win32 = xtrue])
 AM_CONDITIONAL([USE_ASM], [test x$enable_assembly != xno])
 AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
 AM_CONDITIONAL([ARCH_ARM64], [test x$have_arm64 = xtrue])
 AM_CONDITIONAL([MINGW], [test "x$OS" = "xWindows_NT"])
 AM_CONDITIONAL([HAVE_APPLE], [test x$have_apple = xtrue])
 # libcurl install path (for mingw : --with-curl=/usr/local)
 AC_ARG_WITH([curl],
   [  --with-curl=PATH         prefix where curl is installed [default=/usr]])
 if test -n "$with_curl" ; then
   LIBCURL_CFLAGS="$LIBCURL_CFLAGS -I$with_curl/include"
   LIBCURL_CPPFLAGS="$LIBCURL_CPPFLAGS -I$with_curl/include"
   LIBCURL_LDFLAGS="-L$with_curl/lib $LIBCURL_LDFLAGS"
   LIBCURL="-lcurl -lz"
 fi
 CFLAGS="$CFLAGS $LIBCURL_CFLAGS"
 CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS"
 LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS"
 # AC_CHECK_LIB([curl], [curl_multi_timeout],
 #    have_libcurl=yes,
 #    have_libcurl=no AC_MSG_ERROR([curl library required])
 # )
 # LIBCURL_CHECK_CONFIG([yes], 7.15, curlconfig=yes, curlconfig=no)
 AC_SUBST(LIBCURL)
 AC_SUBST(LIBCURL_CFLAGS)
 AC_SUBST(LIBCURL_CPPFLAGS)
 # AC_SUBST(LIBCURL_LDFLAGS)
 AC_SUBST(JANSSON_LIBS)
 AC_SUBST(PTHREAD_FLAGS)
 AC_SUBST(PTHREAD_LIBS)
 AC_SUBST(WS2_LIBS)
 AC_CONFIG_FILES([
 	Makefile
 	compat/Makefile
 	compat/jansson/Makefile
 	])
 AC_OUTPUT
--- a/rin/miner/cpuminer/configure~
+++ b/rin/miner/cpuminer/configure~
--- a/rin/miner/cpuminer/cpu-miner.c
+++ b/rin/miner/cpuminer/cpu-miner.c
--- a/rin/miner/cpuminer/cpuminer-opt-rin
+++ b/rin/miner/cpuminer/cpuminer-opt-rin
--- a/rin/miner/cpuminer/cpuminer.1
+++ b/rin/miner/cpuminer/cpuminer.1
@@ -1,225 +0,0 @@
 .TH CPUMINER 1 "May 2014" "cpuminer 2.4"
 .SH NAME
 cpuminer \- CPU miner for Bitcoin and Litecoin
 .SH SYNOPSIS
 .B cpuminer
 [\fIOPTION\fR]...
 .SH DESCRIPTION
 .B cpuminer
 is a multi-threaded CPU miner for Bitcoin, Litecoin and other cryptocurrencies.
 It supports the getwork and getblocktemplate (BIP 22) methods,
 as well as the Stratum mining protocol.
 .PP
 In its normal mode of operation, \fBcpuminer\fR connects to a mining server
 (specified with the \fB\-o\fR option), receives work from it and starts hashing.
 As soon as a solution is found, it is submitted to the same mining server,
 which can accept or reject it.
 When using getwork or getblocktemplate,
 \fBcpuminer\fR can take advantage of long polling, if the server supports it;
 in any case, fresh work is fetched as needed.
 When using the Stratum protocol this is not possible,
 and the server is responsible for sending fresh work at least every minute;
 if it fails to do so,
 \fBcpuminer\fR may drop the connection and try reconnecting again.
 .PP
 By default, \fBcpuminer\fR writes all its messages to standard error.
 On systems that have a syslog, the \fB\-\-syslog\fR option can be used
 to write to it instead.
 .PP
 On start, the nice value of all miner threads is set to 19.
 On Linux, the scheduling policy is also changed to SCHED_IDLE,
 or to SCHED_BATCH if that fails.
 On multiprocessor systems, \fBcpuminer\fR
 automatically sets the CPU affinity of miner threads
 if the number of threads is a multiple of the number of processors.
 .SH EXAMPLES
 To connect to a Litecoin mining pool that provides a Stratum server
 at example.com on port 3333, authenticating as worker "foo" with password "bar":
 .PP
 .nf
 .RS
 cpuminer \-o stratum+tcp://example.com:3333 \-O foo:bar
 .RE
 .fi
 .PP
 To mine to a local Bitcoin testnet instance running on port 18332,
 authenticating with username "rpcuser" and password "rpcpass":
 .PP
 .nf
 .RS
 cpuminer \-a sha256d \-o http://localhost:18332 \-O rpcuser:rpcpass \\
 	\-\-coinbase\-addr=mpXwg4jMtRhuSpVq4xS3HFHmCmWp9NyGKt
 .RE
 .fi
 .PP
 To connect to a Litecoin P2Pool node running on my.server on port 9327,
 mining in the background and having output sent to the syslog facility,
 omitting the per-thread hashmeter output:
 .PP
 .nf
 .RS
 cpuminer \-BSq \-o http://my.server:9327
 .RE
 .fi
 .SH OPTIONS
 .TP
 \fB\-a\fR, \fB\-\-algo\fR=\fIALGORITHM\fR
 Set the hashing algorithm to use.
 Default is scrypt.
 Possible values are:
 .RS 11
 .TP 10
 .B scrypt
 scrypt(1024, 1, 1) (used by Litecoin)
 .TP
 .B scrypt:\fIN\fR
 scrypt(\fIN\fR, 1, 1) (\fIN\fR must be a power of 2 greater than 1)
 .TP
 .B sha256d
 SHA-256d (used by Bitcoin)
 .RE
 .TP
 \fB\-\-benchmark\fR
 Run in offline benchmark mode.
 .TP
 \fB\-B\fR, \fB\-\-background\fR
 Run in the background as a daemon.
 .TP
 \fB\-\-cert\fR=\fIFILE\fR
 Set an SSL certificate to use with the mining server.
 Only supported when using the HTTPS protocol.
 .TP
 \fB\-\-coinbase\-addr\fR=\fIADDRESS\fR
 Set a payout address for solo mining.
 This is only used in getblocktemplate mode,
 and only if the server does not provide a coinbase transaction.
 .TP
 \fB\-\-coinbase\-sig\fR=\fITEXT\fR
 Set a string to be included in the coinbase (if allowed by the server).
 This is only used in getblocktemplate mode.
 .TP
 \fB\-c\fR, \fB\-\-config\fR=\fIFILE\fR
 Load options from a configuration file.
 \fIFILE\fR must contain a JSON object
 mapping long options to their arguments (as strings),
 or to \fBtrue\fR if no argument is required.
 Sample configuration file:
 .nf
 	{
 		"url": "stratum+tcp://example.com:3333",
 		"userpass": "foo:bar",
 		"retry-pause": "10",
 		"quiet": true
 	}
 .fi
 .TP
 \fB\-D\fR, \fB\-\-debug\fR
 Enable debug output.
 .TP
 \fB\-h\fR, \fB\-\-help\fR
 Print a help message and exit.
 .TP
 \fB\-\-no\-gbt\fR
 Do not use the getblocktemplate RPC method.
 .TP
 \fB\-\-no\-getwork\fR
 Do not use the getwork RPC method.
 .TP
 \fB\-\-no\-longpoll\fR
 Do not use long polling.
 .TP
 \fB\-\-no\-redirect\fR
 Ignore requests from the server to switch to a different URL.
 .TP
 \fB\-\-no\-stratum\fR
 Do not switch to Stratum, even if the server advertises support for it.
 .TP
 \fB\-o\fR, \fB\-\-url\fR=[\fISCHEME\fR://][\fIUSERNAME\fR[:\fIPASSWORD\fR]@]\fIHOST\fR:\fIPORT\fR[/\fIPATH\fR]
 Set the URL of the mining server to connect to.
 Supported schemes are \fBhttp\fR, \fBhttps\fR and \fBstratum+tcp\fR.
 If no scheme is specified, http is assumed.
 Specifying a \fIPATH\fR is only supported for HTTP and HTTPS.
 Specifying credentials has the same effect as using the \fB\-O\fR option.
 By default, on HTTP and HTTPS,
 the miner tries to use the getblocktemplate RPC method,
 and falls back to using getwork if getblocktemplate is unavailable.
 This behavior can be modified by using the \fB\-\-no\-gbt\fR
 and \fB\-\-no\-getwork\fR options.
 .TP
 \fB\-O\fR, \fB\-\-userpass\fR=\fIUSERNAME\fR:\fIPASSWORD\fR
 Set the credentials to use for connecting to the mining server.
 Any value previously set with \fB\-u\fR or \fB\-p\fR is discarded.
 .TP
 \fB\-p\fR, \fB\-\-pass\fR=\fIPASSWORD\fR
 Set the password to use for connecting to the mining server.
 Any password previously set with \fB\-O\fR is discarded.
 .TP
 \fB\-P\fR, \fB\-\-protocol\-dump\fR
 Enable output of all protocol-level activities.
 .TP
 \fB\-q\fR, \fB\-\-quiet\fR
 Disable per-thread hashmeter output.
 .TP
 \fB\-r\fR, \fB\-\-retries\fR=\fIN\fR
 Set the maximum number of times to retry if a network call fails.
 If not specified, the miner will retry indefinitely.
 .TP
 \fB\-R\fR, \fB\-\-retry\-pause\fR=\fISECONDS\fR
 Set how long to wait between retries. Default is 30 seconds.
 .TP
 \fB\-s\fR, \fB\-\-scantime\fR=\fISECONDS\fR
 Set an upper bound on the time the miner can go without fetching fresh work.
 This setting has no effect in Stratum mode or when long polling is activated.
 Default is 5 seconds.
 .TP
 \fB\-S\fR, \fB\-\-syslog\fR
 Log to the syslog facility instead of standard error.
 .TP
 \fB\-t\fR, \fB\-\-threads\fR=\fIN\fR
 Set the number of miner threads.
 If not specified, the miner will try to detect the number of available processors
 and use that.
 .TP
 \fB\-T\fR, \fB\-\-timeout\fR=\fISECONDS\fR
 Set a timeout for long polling.
 .TP
 \fB\-u\fR, \fB\-\-user\fR=\fIUSERNAME\fR
 Set the username to use for connecting to the mining server.
 Any username previously set with \fB\-O\fR is discarded.
 .TP
 \fB\-V\fR, \fB\-\-version\fR
 Display version information and quit.
 .TP
 \fB\-x\fR, \fB\-\-proxy\fR=[\fISCHEME\fR://][\fIUSERNAME\fR:\fIPASSWORD\fR@]\fIHOST\fR:\fIPORT\fR
 Connect to the mining server through a proxy.
 Supported schemes are: \fBhttp\fR, \fBsocks4\fR, \fBsocks5\fR.
 Since libcurl 7.18.0, the following are also supported:
 \fBsocks4a\fR, \fBsocks5h\fR (SOCKS5 with remote name resolving).
 If no scheme is specified, the proxy is treated as an HTTP proxy.
 .SH ENVIRONMENT
 The following environment variables can be specified in lower case or upper case;
 the lower-case version has precedence. \fBhttp_proxy\fR is an exception
 as it is only available in lower case.
 .PP
 .RS
 .TP
 \fBhttp_proxy\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
 Sets the proxy server to use for HTTP.
 .TP
 \fBHTTPS_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
 Sets the proxy server to use for HTTPS.
 .TP
 \fBALL_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
 Sets the proxy server to use if no protocol-specific proxy is set.
 .RE
 .PP
 Using an environment variable to set the proxy has the same effect as
 using the \fB\-x\fR option.
 .SH AUTHOR
 This variant is maintained by tpruvot@github.
 Most of the code in the current version of cpuminer was written by
 Pooler <pooler@litecoinpool.org> with contributions from others.
 The original minerd was written by Jeff Garzik <jeff@garzik.org>.
--- a/rin/miner/cpuminer/dummy.cpp
+++ b/rin/miner/cpuminer/dummy.cpp
@@ -1,6 +0,0 @@
 // This file exists to force the use of g++ as the linker which in turn
 // links the math library with the inclusion of math.h. gcc will not 
 // automatically link math. Without this file linking will fail for m7m.c.
 // Linking math manually, allowing gcc to do the linking work on Linux
 // but on Windows it segfaults. Until that is solved this file must continue
 // to exist.
--- a/rin/miner/cpuminer/elist.h
+++ b/rin/miner/cpuminer/elist.h
@@ -1,254 +0,0 @@
 #ifndef _LINUX_LIST_H
 #define _LINUX_LIST_H
 /*
 * Simple doubly linked list implementation.
 *
 * Some of the internal functions ("__xxx") are useful when
 * manipulating whole lists rather than single entries, as
 * sometimes we already know the next/prev entries and we can
 * generate better code by using them directly rather than
 * using the generic single-entry routines.
 */
 struct list_head {
 	struct list_head *next, *prev;
 };
 #define LIST_HEAD_INIT(name) { &(name), &(name) }
 #define LIST_HEAD(name) \
 	struct list_head name = LIST_HEAD_INIT(name)
 #define INIT_LIST_HEAD(ptr) do { \
 	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
 } while (0)
 /*
 * Insert a new entry between two known consecutive entries.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
 static inline void __list_add(struct list_head *nlh,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
 	next->prev = nlh;
 	nlh->next = next;
 	nlh->prev = prev;
 	prev->next = nlh;
 }
 /**
 * list_add - add a new entry
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 */
 static inline void list_add(struct list_head *nlh, struct list_head *head)
 {
 	__list_add(nlh, head, head->next);
 }
 /**
 * list_add_tail - add a new entry
 * @new: new entry to be added
 * @head: list head to add it before
 *
 * Insert a new entry before the specified head.
 * This is useful for implementing queues.
 */
 static inline void list_add_tail(struct list_head *nlh, struct list_head *head)
 {
 	__list_add(nlh, head->prev, head);
 }
 /*
 * Delete a list entry by making the prev/next entries
 * point to each other.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
 static inline void __list_del(struct list_head *prev, struct list_head *next)
 {
 	next->prev = prev;
 	prev->next = next;
 }
 /**
 * list_del - deletes entry from list.
 * @entry: the element to delete from the list.
 * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
 */
 static inline void list_del(struct list_head *entry)
 {
 	__list_del(entry->prev, entry->next);
 	entry->next = NULL;
 	entry->prev = NULL;
 }
 /**
 * list_del_init - deletes entry from list and reinitialize it.
 * @entry: the element to delete from the list.
 */
 static inline void list_del_init(struct list_head *entry)
 {
 	__list_del(entry->prev, entry->next);
 	INIT_LIST_HEAD(entry);
 }
 /**
 * list_move - delete from one list and add as another's head
 * @list: the entry to move
 * @head: the head that will precede our entry
 */
 static inline void list_move(struct list_head *list, struct list_head *head)
 {
        __list_del(list->prev, list->next);
        list_add(list, head);
 }
 /**
 * list_move_tail - delete from one list and add as another's tail
 * @list: the entry to move
 * @head: the head that will follow our entry
 */
 static inline void list_move_tail(struct list_head *list,
 				  struct list_head *head)
 {
        __list_del(list->prev, list->next);
        list_add_tail(list, head);
 }
 /**
 * list_empty - tests whether a list is empty
 * @head: the list to test.
 */
 static inline int list_empty(struct list_head *head)
 {
 	return head->next == head;
 }
 static inline void __list_splice(struct list_head *list,
 				 struct list_head *head)
 {
 	struct list_head *first = list->next;
 	struct list_head *last = list->prev;
 	struct list_head *at = head->next;
 	first->prev = head;
 	head->next = first;
 	last->next = at;
 	at->prev = last;
 }
 /**
 * list_splice - join two lists
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 */
 static inline void list_splice(struct list_head *list, struct list_head *head)
 {
 	if (!list_empty(list))
 		__list_splice(list, head);
 }
 /**
 * list_splice_init - join two lists and reinitialise the emptied list.
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 *
 * The list at @list is reinitialised
 */
 static inline void list_splice_init(struct list_head *list,
 				    struct list_head *head)
 {
 	if (!list_empty(list)) {
 		__list_splice(list, head);
 		INIT_LIST_HEAD(list);
 	}
 }
 /**
 * list_entry - get the struct for this entry
 * @ptr:	the &struct list_head pointer.
 * @type:	the type of the struct this is embedded in.
 * @member:	the name of the list_struct within the struct.
 */
 #define list_entry(ptr, type, member) \
 	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
 /**
 * list_for_each	-	iterate over a list
 * @pos:	the &struct list_head to use as a loop counter.
 * @head:	the head for your list.
 */
 #define list_for_each(pos, head) \
 	for (pos = (head)->next; pos != (head); \
        	pos = pos->next)
 /**
 * list_for_each_prev	-	iterate over a list backwards
 * @pos:	the &struct list_head to use as a loop counter.
 * @head:	the head for your list.
 */
 #define list_for_each_prev(pos, head) \
 	for (pos = (head)->prev; pos != (head); \
        	pos = pos->prev)
 /**
 * list_for_each_safe	-	iterate over a list safe against removal of list entry
 * @pos:	the &struct list_head to use as a loop counter.
 * @n:		another &struct list_head to use as temporary storage
 * @head:	the head for your list.
 */
 #define list_for_each_safe(pos, n, head) \
 	for (pos = (head)->next, n = pos->next; pos != (head); \
 		pos = n, n = pos->next)
 /**
 * list_for_each_entry	-	iterate over list of given type
 * @pos:	the type * to use as a loop counter.
 * @head:	the head for your list.
 * @member:	the name of the list_struct within the struct.
 * @type:	the type of the struct.
 */
 #define list_for_each_entry(pos, head, member, type)			\
 	for (pos = list_entry((head)->next, type, member);	\
 	     &pos->member != (head); 					\
 	     pos = list_entry(pos->member.next, type, member))
 /**
 * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
 * @pos:	the type * to use as a loop counter.
 * @n:		another type * to use as temporary storage
 * @head:	the head for your list.
 * @member:	the name of the list_struct within the struct.
 * @type:	the type of the struct.
 */
 #define list_for_each_entry_safe(pos, n, head, member, type)		\
 	for (pos = list_entry((head)->next, type, member),	\
 		n = list_entry(pos->member.next, type, member);	\
 	     &pos->member != (head); 					\
 	     pos = n, n = list_entry(n->member.next, type, member))
 /**
 * list_for_each_entry_continue -       iterate over list of given type
 *                      continuing after existing point
 * @pos:        the type * to use as a loop counter.
 * @head:       the head for your list.
 * @member:     the name of the list_struct within the struct.
 * @type:       the type of the struct.
 */
 #define list_for_each_entry_continue(pos, head, member, type)		\
 	for (pos = list_entry(pos->member.next, type, member),	\
 		     prefetch(pos->member.next);			\
 	     &pos->member != (head);					\
 	     pos = list_entry(pos->member.next, type, member),	\
 		     prefetch(pos->member.next))
 #endif
--- a/rin/miner/cpuminer/libbrotlicommon.dll
+++ b/rin/miner/cpuminer/libbrotlicommon.dll
--- a/rin/miner/cpuminer/libbrotlidec.dll
+++ b/rin/miner/cpuminer/libbrotlidec.dll
--- a/rin/miner/cpuminer/libcrypto-3-x64.dll
+++ b/rin/miner/cpuminer/libcrypto-3-x64.dll
--- a/rin/miner/cpuminer/libcurl-4.dll
+++ b/rin/miner/cpuminer/libcurl-4.dll
--- a/rin/miner/cpuminer/libgcc_s_seh-1.dll
+++ b/rin/miner/cpuminer/libgcc_s_seh-1.dll
--- a/rin/miner/cpuminer/libgmp-10.dll
+++ b/rin/miner/cpuminer/libgmp-10.dll
--- a/rin/miner/cpuminer/libiconv-2.dll
+++ b/rin/miner/cpuminer/libiconv-2.dll
--- a/rin/miner/cpuminer/libidn2-0.dll
+++ b/rin/miner/cpuminer/libidn2-0.dll
--- a/rin/miner/cpuminer/libintl-8.dll
+++ b/rin/miner/cpuminer/libintl-8.dll
--- a/rin/miner/cpuminer/libnghttp2-14.dll
+++ b/rin/miner/cpuminer/libnghttp2-14.dll
--- a/rin/miner/cpuminer/libpsl-5.dll
+++ b/rin/miner/cpuminer/libpsl-5.dll
--- a/rin/miner/cpuminer/libssh2-1.dll
+++ b/rin/miner/cpuminer/libssh2-1.dll
--- a/rin/miner/cpuminer/libssl-3-x64.dll
+++ b/rin/miner/cpuminer/libssl-3-x64.dll
--- a/rin/miner/cpuminer/libunistring-5.dll
+++ b/rin/miner/cpuminer/libunistring-5.dll
--- a/rin/miner/cpuminer/libwinpthread-1.dll
+++ b/rin/miner/cpuminer/libwinpthread-1.dll
--- a/rin/miner/cpuminer/libzstd.dll
+++ b/rin/miner/cpuminer/libzstd.dll
--- a/rin/miner/cpuminer/malloc-huge.c
+++ b/rin/miner/cpuminer/malloc-huge.c
@@ -1,36 +0,0 @@
 #include "malloc-huge.h"
 #include "miner.h"
 #define HUGEPAGE_SIZE_2M  (2 * 1024 * 1024)
 void *malloc_hugepages( size_t size )
 {
 #if !(defined(MAP_HUGETLB) && defined(MAP_ANON))
 //   applog( LOG_WARNING, "Huge pages not available",size);
   return NULL;
 #else
   if ( size < HUGEPAGE_MIN_ALLOC )
   {
 //	   applog( LOG_WARNING, "Block too small for huge pages: %lu bytes",size);
 	   return NULL;
   }
   const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE_2M - 1;
   void *p = NULL;
   int flags =
   #ifdef MAP_NOCORE
                MAP_NOCORE |
   #endif
 		          MAP_HUGETLB | MAP_ANON | MAP_PRIVATE;
   // round size up to next page boundary
   size = ( size + hugepage_mask ) & (~hugepage_mask);
   p = mmap( NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0 );
   if ( p == MAP_FAILED )
      p = NULL;
   return p;
 #endif
 }
--- a/rin/miner/cpuminer/malloc-huge.h
+++ b/rin/miner/cpuminer/malloc-huge.h
@@ -1,24 +0,0 @@
 #if !(defined(MALLOC_HUGE__))
 #define MALLOC_HUGE__
 #include <errno.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #ifdef __unix__
 #include <sys/mman.h>
 #endif
 #if defined(MAP_HUGETLB)
 // Minimum block size 6 MiB to use huge pages
 #define HUGEPAGE_MIN_ALLOC    (6 * 1024 * 1024)
 #endif
 // Attempt to allocate memory backed by 2 MiB pages, returns NULL on failure.
 void *malloc_hugepages( size_t size );
 #endif
--- a/rin/miner/cpuminer/miner-coin.bat
+++ b/rin/miner/cpuminer/miner-coin.bat
@@ -1,3 +0,0 @@
 :start
 cpuminer.exe -a rinhash  -o stratum+tcps://stratum-eu.rplant.xyz:17148 -u rin1qvj0yyt9phvled9kxflju3p687a4s7kareglpk5.dd
 goto start
--- a/rin/miner/cpuminer/miner.h
+++ b/rin/miner/cpuminer/miner.h
--- a/rin/miner/cpuminer/nomacro.pl
+++ b/rin/miner/cpuminer/nomacro.pl
@@ -1,47 +0,0 @@
 #!/usr/bin/perl
 # Copyright 2012 pooler@litecoinpool.org
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.  See COPYING for more details.
 #
 # nomacro.pl - convert assembler macros to C preprocessor macros.
 use strict;
 foreach my $f (<*.S>) {
 	rename $f, "$f.orig";
 	open FIN, "$f.orig";
 	open FOUT, ">$f";
 	my $inmacro = 0;
 	my %macros = ();
 	while (<FIN>) {
 		if (m/^\.macro\s+([_0-9A-Z]+)(?:\s*)(.*)$/i) {
 			print FOUT "#define $1($2) \\\n";
 			$macros{$1} = 1;
 			$inmacro = 1;
 			next;
 		}
 		if (m/^\.endm/) {
 			print FOUT "\n";
 			$inmacro = 0;
 			next;
 		}
 		for my $m (keys %macros) {
 			s/^([ \t]*)($m)(?:[ \t]+([^#\n]*))?([;\n])/\1\2(\3)\4/;
 		}
 		if ($inmacro) {
 			if (m/^\s*#if/) {
 				$_ = <FIN> while (!m/^\s*#endif/);
 				next;
 			}
 			next if (m/^\s*$/);
 			s/\\//g;
 			s/$/; \\/;
 		}
 		print FOUT;
 	}
 	close FOUT;
 	close FIN;
 }
--- a/rin/miner/cpuminer/simd-utils.h
+++ b/rin/miner/cpuminer/simd-utils.h
@@ -1,233 +0,0 @@
 #if !defined(SIMD_UTILS_H__)
 #define SIMD_UTILS_H__ 1
 //////////////////////////////////////////////////////////////////////
 //
 //             SIMD utilities
 //
 //    Not to be confused with the hashing function of the same name. This
 //    is about Single Instruction Multiple Data programming using CPU
 //    features such as SSE and AVX.
 //
 //    This header is the entry point to a suite of macros and functions
 //    to perform basic operations on vectors that are useful in crypto
 //    mining. Some of these functions have native CPU support for scalar
 //    data but not for vectors. The main categories are bit rotation
 //    and endian byte swapping
 //
 //    This suite supports some operations on regular 64 bit integers
 //    as well as 128 bit integers available on recent versions of Linux
 //    and GCC.
 //
 //    It also supports various vector sizes on CPUs that meet the minimum
 //    requirements.
 //
 //    The minimum for any real work is a 64 bit CPU with SSE2,
 //    ie an the Intel Core 2.
 //
 //    Following are the minimum requirements for each vector size. There
 //    is no significant 64 bit vectorization therefore SSE2 is the practical
 //    minimum for using this code.
 //
 //    SSE2:   128 bit vectors  (64 bit CPUs only, such as Intel Core2.
 //    AVX2:   256 bit vectors  (Starting with Intel Haswell and AMD Ryzen)
 //    AVX512: 512 bit vectors  (Starting with SkylakeX)
 //    AVX10:  when available will supersede AVX512 and will bring AVX512
 //        features, except 512 bit vectors, to Intel's Ecores. It needs to be
 //        enabled manually when the relevant GCC macros are known.
 //
 //    Most functions are avalaible at the stated levels but in rare cases
 //    a higher level feature may be required with no compatible alternative.
 //    Some SSE2 functions have versions optimized for higher feature levels
 //    such as SSSE3 or SSE4.1 that will be used automatically on capable
 //    CPUs.
 //
 //    Strict alignment of data is required: 16 bytes for 128 bit vectors,
 //    32 bytes for 256 bit vectors and 64 bytes for 512 bit vectors. 64 byte
 //    alignment is recommended in all cases for best cache alignment.
 //
 //    All functions are defined with type agnostic pointers (void*) arguments
 //    and are cast or aliased as the appropriate type. This adds convenience
 //    for the applications but also adds responsibility to ensure adequate data
 //    alignment.
 //
 //    An attempt was made to make the names as similar as possible to
 //    Intel's intrinsic function format. Most variations are to avoid
 //    confusion with actual Intel intrinsics, brevity, and clarity.
 //
 //    The main differences are:
 //
 //   - the leading underscore "_" is dropped from the prefix of vector function
 //     macros.
 //   - "mm128" is used 128 bit prefix to be consistent with mm256 & mm512 and
 //     to avoid the ambiguity of "mm" which is also used for 64 bit MMX
 //     intrinsics.
 //   - the element size does not include additional type specifiers
 //      like "epi".
 //   - there is a subset of some functions for scalar data. They may have
 //     no prefix nor vec-size, just one size, the size of the data.
 //   - Some integer functions are also defined which use a similar notation.
 //   
 //    Function names follow this pattern:
 //
 //         [prefix]_[op][vsize]_[esize]
 //
 //    Prefix: usually the size of the returned vector.
 //    Following are some examples:
 //
 //    u64:  unsigned 64 bit integer function
 //    i128: signed 128 bit integer function (rarely used)
 //    m128: 128 bit vector identifier (deprecated)
 //    mm128: 128 bit vector function
 //
 //    op: describes the operation of the function or names the data
 //        identifier.
 //
 //    esize: optional, element size of operation
 //
 //    vsize: optional, lane size used when a function operates on elements
 //           within lanes of a larger vector.
 //
 //    Ex: mm256_shuflr128_32 rotates each 128 bit lane of a 256 bit vector
 //        right by 32 bits.
 // 
 //  New architecture agnostic syntax to support multiple architectures.
 //  currently only used for 128 bit vectors.
 //
 //         [prefix]_[op]esize]
 //
 //  Abbreviated when no vsize, space is removed between op & esize.
 //
 //  Ex:  v128_add32 gets remapped to the appropriate architecture intrinsic.
 //
 //  New type specification includes element size because it's significant on
 //  AArch64. For x86_64 they'r all maped to v128_t. On arm the default is
 //  v128u32_t.
 //
 //   v128_t, v1q28u64_t, v128u32_t.
 //
 //  [prefix] is changed to "v128" or size specific for typedef.
 //
 // Vector constants
 //
 // Vector constants are a big problem because they technically don't exist.
 // All vectors used as constants either reside in memory or must be genererated
 // at run time at significant cost. The cost of generating a constant
 // increases non-linearly with the number of vector elements. A 4 element
 // vector costs between 7 and 11 clocks to generate, an 8 element vector
 // is 15-25 clocks. There are also additional clock due to data dependency
 // stalls.
 //
 // Vector constants are often used as control indexes for permute, blend, etc,
 // where generating the index can be over 90% of the operation. This is
 // where the problem occurs. An instruction that only requires one to 3
 // clocks needs may times more just to build the index argument.
 //
 // There is very little a programmer can do to avoid the worst case scenarios.
 // Smaller integers can be merged to form 64 bit integers, and vectors with
 // repeated elements can be generated more efficiently but they have limited
 // benefit and limited application.
 //
 // If a vector constant is to be used repeatedly it is better to define a local
 // variable to generate the constant only once.
 //
 //////////////////////////////////////////////////////////////////////////
 #include <inttypes.h>
 #include <memory.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stddef.h>
 // AVX512 macros are not a reliable indicator of 512 bit vector capability
 // because they get defined with AVX10_1_256 which doesn't support 512 bit.
 // EVEX512 is also unreliable as it can also be defined when 512b is not
 // available.
 // Use AVX10_1_512 for 512b & AVX10_1_256 for 256b whenever AVX10 is present.
 // Use AVX512 macros only whithout AVX10.
 /*
 // Test for macros
 #ifdef __AVX10_1__
 #warning "__AVX10_1__"
 #endif
 #ifdef __AVX10_1_256__
 #warning "__AVX10_1_256__"
 #endif
 #ifdef __AVX10_1_512__
 #warning "__AVX10_1_512__"
 #endif
 #ifdef __EVEX256__
 #warning "__EVEX256__"
 #endif
 #ifdef __EVEX512__
 #warning "__EVEX512__"
 #endif
 #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
 #warning "AVX512"
 #endif
 */
 // SIMD512: Use 512, 256 & 128 bit vectors, AVX512VBMI is not included and
 // must be tested seperately. 
 // VL256: Include AVX512VL instructions for 256 & 128 bit vectors.
 // VBMI: Include AVX512VBMI instructions for supported vector lengths.
 #if defined(__AVX10_1__)
  #define VL256 1
  #define VBMI 1
  #if defined(__AVX10_1_512__)
    #define SIMD512 1
  #endif
 #elif defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
  #define VL256 1
  #define SIMD512 1
  #if defined(__AVX512VBMI__)
    #define VBMI 1
  #endif
 #endif
 /*
 #if defined(SIMD512)
 #warning "SIMD512"
 #endif
 #if defined(VBMI)
 #warning "VBMI"
 #endif
 #if defined(VL256)
 #warning "VL256"
 #endif
 */
 #if defined(__x86_64__)
 #include <x86intrin.h>
 #elif defined(__aarch64__)
 #include <arm_neon.h>
 #endif
 #include "simd-utils/simd-int.h"
 // x86_64 SSE2 128 bit vectors
 #include "simd-utils/simd-128.h"
 // x86_64 AVX2 256 bit vectors
 #include "simd-utils/simd-256.h"
 // x86_64 AVX512 512 bit vectors
 #include "simd-utils/simd-512.h"
 // aarch64 neon 128 bit vectors
 #include "simd-utils/simd-neon.h"
 #include "simd-utils/intrlv.h"
 #endif  // SIMD_UTILS_H__
--- a/rin/miner/cpuminer/sysinfos.c
+++ b/rin/miner/cpuminer/sysinfos.c
--- a/rin/miner/cpuminer/util.c
+++ b/rin/miner/cpuminer/util.c
--- a/rin/miner/cpuminer/verthash-help.txt
+++ b/rin/miner/cpuminer/verthash-help.txt
@@ -1,80 +0,0 @@
 Quickstart:
 ----------
 First time mining verthash or don't have a Verthash data file:
 --algo verthash --verify --url ...
 Verthash data file already exists:
 --algo verthash --data-file /path/to/verthash.dat --url ...
 Background:
 ----------
 Verthash algorithm requires a data file for hashing. This file is
 static, portable, and only needs to be created once.
 A Verthash data file created by VerthashMiner can also be used by cpuminer-opt
 and used simultaneously by both miners.
 Due to its size >1GB it is recommened one data file be created and
 stored in a permanent location accessible to any miner that wants to use it.
 New command line options:
 ------------------------
 cpuminer-opt adds two new command line options for verthash. The names
 and some behaviour is changed from VerthashMiner.
 --data-file /path/to/verthash.dat
  default when not used is verthash.dat in current working directory.  
 --verify
  verify integrity of file specified by --data-file, or if not specified
  the default data file if it exists, or create a default file and verify it
  if one does not yet exist. Data file verification is disabled by default.
 Detailed usage:
 --------------
 If a data file already exists it can be selected using the --data-file
 option to specify the path and name of the file.
 --algo verthash --datafile /path/to/verthash.dat --url ...
 If the --data-file option is not used the default is to use 'verthash.dat'
 from the current working directory.
 If no data file exists it can be created by using the --verify option
 without the --data-file option. If the default data file is not found in
 the current directory it will be created.
 --algo verthash --verify --url ...
 Data file creation can take up to 30 minutes on a spinning hard drive. 
 Once created the new data file will be verified and used immediately
 if a valid url and user were included on the command line.
 A default data file can be created by ommitting the url option. That will
 either verify an existing default data file or create one and verify it,
 then exit.
 --algo verthash --verify
 A data file will never be created if --data-file is specified. The miner
 will exit with an error if the file is not found in the specified location.
 This is to avoid accidentally creating an unwanted data file due to a typo.
 After creation the data file can moved to a more convenient location and
 referenced by --data-file, or left where it is and used by default without the
 --data-file option.
 Data file verification takes a few seconds and is disabled by default.
 VerthashMiner enables data file verification by default and has an option to
 disable it.
 The --verify option is intended primarily to create a new file. It's 
 not necessary or useful to verify a file every time the miner is started.
--- a/rin/miner/cpuminer/winbuild-cross.sh
+++ b/rin/miner/cpuminer/winbuild-cross.sh
@@ -1,131 +0,0 @@
 #!/bin/bash
 #
 # Script for building Windows binaries release package using mingw.
 # Requires a custom mingw environment, not intended for users.
 #
 # Compiles Windows EXE files for selected CPU architectures, copies them
 # as well as some DLLs that aren't available in most Windows environments
 # into a release folder ready to be zipped and uploaded.
 # define some local variables
 export LOCAL_LIB="$HOME/usr/lib"
 export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --host=x86_64-w64-mingw32"
 export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
 # set correct gcc version
 export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
 # used by GCC
 export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs"
 export DEFAULT_CFLAGS="-maes -O3 -Wall"
 export DEFAULT_CFLAGS_OLD="-O3 -Wall"
 # make link to local gmp header file.
 ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
 # make release directory and copy selected DLLs.
 rm -rf release > /dev/null
 mkdir release
 cp README.txt release/
 cp README.md release/
 cp RELEASE_NOTES release/
 cp verthash-help.txt release/
 cp $MINGW_LIB/zlib1.dll release/
 cp $MINGW_LIB/libwinpthread-1.dll release/
 cp $GCC_MINGW_LIB/libstdc++-6.dll release/
 cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/
 cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
 # Start building...
 # AVX512 SHA VAES: Intel Core Icelake, Rocketlake
 ./clean-all.sh || echo clean
 rm -f config.status
 ./autogen.sh || echo done
 CFLAGS="-march=icelake-client $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
 # AVX512 AES: Intel Core HEDT Slylake-X, Cascadelake 
 make clean || echo clean
 rm -f config.status
 CFLAGS="-march=skylake-avx512 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-avx512.exe
 # AVX2 SHA VAES: Intel Alderlake, AMD Zen3
 make clean || echo done
 rm -f config.status
 CFLAGS="-mavx2 -msha -mvaes $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-avx2-sha-vaes.exe
 # AVX2 AES SHA: AMD Zen1
 make clean || echo clean
 rm -f config.status
 CFLAGS="-march=znver1 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-avx2-sha.exe
 # AVX2 AES: Intel Core Haswell, Skylake, Kabylake, Coffeelake, Cometlake
 make clean || echo clean
 rm -f config.status
 CFLAGS="-march=core-avx2 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-avx2.exe
 # AVX AES: Intel Sandybridge, Ivybridge
 make clean || echo clean
 rm -f config.status
 CFLAGS="-march=corei7-avx -maes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS 
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-avx.exe
 # SSE4.2 AES: Intel Westmere
 make clean || echo clean
 rm -f config.status
 CFLAGS="-march=westmere -maes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-aes-sse42.exe
 # Nehalem SSE4.2
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="$DEFAULT_CFLAGS_OLD -march=corei7" ./configure $CONFIGURE_ARGS
 #make 
 #strip -s cpuminer.exe
 #mv cpuminer.exe release/cpuminer-sse42.exe
 # Core2 SSSE3
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="$DEFAULT_CFLAGS_OLD -march=core2" ./configure $CONFIGURE_ARGS
 #make 
 #strip -s cpuminer.exe
 #mv cpuminer.exe release/cpuminer-ssse3.exe
 #make clean || echo clean
 # Generic SSE2
 make clean || echo clean
 rm -f config.status
 CFLAGS="-msse2 $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
 make -j 8
 strip -s cpuminer.exe
 mv cpuminer.exe release/cpuminer-sse2.exe
 #make clean || echo clean
 # Native with CPU groups ennabled
 #make clean || echo clean
 #rm -f config.status
 #CFLAGS="-march=native $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
 #make -j 8
 #strip -s cpuminer.exe
--- a/rin/miner/cpuminer/zlib1.dll
+++ b/rin/miner/cpuminer/zlib1.dll
--- a/rin/miner/gpu/RinHash-cuda/CMakeLists.txt
+++ b/rin/miner/gpu/RinHash-cuda/CMakeLists.txt
@@ -0,0 +1,52 @@
 cmake_minimum_required(VERSION 3.18)
 project(RinHashCUDA LANGUAGES CXX CUDA)
 # Set C++ standard
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 11)
 # Find CUDA
 find_package(CUDA REQUIRED)
 # Set CUDA architectures
 set(CMAKE_CUDA_ARCHITECTURES "50;52;60;61;70;75;80;86")
 # Include directories
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 # Source files
 set(CUDA_SOURCES
    rinhash.cu
    sha3-256.cu
 )
 set(HEADERS
    rinhash_device.cuh
    argon2d_device.cuh
    blake3_device.cuh
    blaze3_cpu.cuh
 )
 # Create executable
 add_executable(rinhash-cuda-miner ${CUDA_SOURCES} ${HEADERS})
 # Set CUDA properties
 set_target_properties(rinhash-cuda-miner PROPERTIES
    CUDA_RUNTIME_LIBRARY Shared
 )
 # Link CUDA libraries
 target_link_libraries(rinhash-cuda-miner 
    ${CUDA_LIBRARIES}
    ${CUDA_CUDART_LIBRARY}
 )
 # Compiler-specific options
 if(MSVC)
    target_compile_options(rinhash-cuda-miner PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-O3>)
 else()
    target_compile_options(rinhash-cuda-miner PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-O3>)
 endif()
 # Install target
 install(TARGETS rinhash-cuda-miner DESTINATION bin)
--- a/rin/miner/gpu/RinHash-cuda/LICENSE
+++ b/rin/miner/gpu/RinHash-cuda/LICENSE
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025 Rin coin
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/rin/miner/gpu/RinHash-cuda/Makefile
+++ b/rin/miner/gpu/RinHash-cuda/Makefile
@@ -0,0 +1,40 @@
 # RinHash CUDA Miner Makefile
 # CUDA implementation of RinHash algorithm for GPU mining
 # Compiler and flags
 NVCC = nvcc
 CUDA_ARCH = -arch=sm_50 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86
 NVCC_FLAGS = -O3 -std=c++11 -Xcompiler -fPIC
 INCLUDES = -I.
 LIBS = -lcuda -lcudart
 # Source files
 CUDA_SOURCES = rinhash.cu sha3-256.cu
 HEADERS = rinhash_device.cuh argon2d_device.cuh blake3_device.cuh blaze3_cpu.cuh
 # Output executable
 TARGET = rinhash-cuda-miner.exe
 # Build targets
 all: $(TARGET)
 $(TARGET): $(CUDA_SOURCES) $(HEADERS)
 	$(NVCC) $(NVCC_FLAGS) $(CUDA_ARCH) $(INCLUDES) $(CUDA_SOURCES) -o $(TARGET) $(LIBS)
 # Clean build artifacts
 clean:
 	del /Q $(TARGET) *.obj 2>nul || true
 # Install target (copy to main directory)
 install: $(TARGET)
 	copy $(TARGET) ..\..\$(TARGET)
 # Debug build
 debug: NVCC_FLAGS += -g -G -DDEBUG
 debug: $(TARGET)
 # Test run
 test: $(TARGET)
 	.\$(TARGET) --help
 .PHONY: all clean install debug test
--- a/rin/miner/gpu/RinHash-cuda/README.md
+++ b/rin/miner/gpu/RinHash-cuda/README.md
@@ -0,0 +1,26 @@
 # RinHash CUDA Implementation
 🚀 High-performance GPU implementation of RinHash – an ASIC-resistant hashing algorithm designed for RinCoin mining.
 ## 🔧 Algorithm Overview
 RinHash is a custom Proof-of-Work algorithm designed to resist ASICs by combining three cryptographic hash functions:
 1. **BLAKE3** – Fast and modern hashing.
 2. **Argon2d** – Memory-hard password hashing (64KB, 2 iterations).
 3. **SHA3-256** – Secure final hash.
 The final output is a 32-byte SHA3-256 digest of the Argon2d result, which itself is applied to the BLAKE3 hash of the input block header.
 ---
 ## 💻 CUDA Implementation
 This repository contains a full GPU-based implementation of RinHash, ported to CUDA for use in high-efficiency miners. Key features include:
 - Full GPU parallelization of BLAKE3, Argon2d, and SHA3-256
 - Memory-hard Argon2d executed entirely on device memory
 - Batch processing support for multiple nonces
 - Matching hash output with official CPU implementation
 ---
--- a/rin/miner/gpu/RinHash-cuda/RinHashCUDA.sln
+++ b/rin/miner/gpu/RinHash-cuda/RinHashCUDA.sln
@@ -0,0 +1,24 @@
 Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 17
 VisualStudioVersion = 17.0.31903.59
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RinHashCUDA", "RinHashCUDA.vcxproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|x64 = Debug|x64
 		Release|x64 = Release|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x64.ActiveCfg = Debug|x64
 		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x64.Build.0 = Debug|x64
 		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x64.ActiveCfg = Release|x64
 		{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {12345678-1234-5678-9ABC-DEF123456789}
 	EndGlobalSection
 EndGlobal
--- a/rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj
+++ b/rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj
@@ -0,0 +1,114 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>16.0</VCProjectVersion>
    <ProjectGuid>{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>RinHashCUDA</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
    <OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
    <IntDir>$(SolutionDir)obj\$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
    <OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
    <IntDir>$(SolutionDir)obj\$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <AdditionalIncludeDirectories>$(CUDA_PATH)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <AdditionalLibraryDirectories>$(CUDA_PATH)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <AdditionalIncludeDirectories>$(CUDA_PATH)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <AdditionalLibraryDirectories>$(CUDA_PATH)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="rinhash.cu">
      <FileType>CUDA C/C++</FileType>
    </ClCompile>
    <ClCompile Include="sha3-256.cu">
      <FileType>CUDA C/C++</FileType>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="argon2d_device.cuh" />
    <ClInclude Include="blake3_device.cuh" />
    <ClInclude Include="blaze3_cpu.cuh" />
    <ClInclude Include="rinhash_device.cuh" />
  </ItemGroup>
  <ItemGroup>
    <None Include="build-cuda.bat" />
    <None Include="CMakeLists.txt" />
    <None Include="Makefile" />
    <None Include="README.md" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 12.5.targets" />
  </ImportGroup>
 </Project>
--- a/rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj.filters
+++ b/rin/miner/gpu/RinHash-cuda/RinHashCUDA.vcxproj.filters
@@ -0,0 +1,56 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <Filter Include="Source Files">
      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D38A0280}</UniqueIdentifier>
      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
    </Filter>
    <Filter Include="Header Files">
      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
    </Filter>
    <Filter Include="CUDA Files">
      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
      <Extensions>cu;cuh</Extensions>
    </Filter>
    <Filter Include="Build Files">
      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD02}</UniqueIdentifier>
    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="rinhash.cu">
      <Filter>CUDA Files</Filter>
    </ClCompile>
    <ClCompile Include="sha3-256.cu">
      <Filter>CUDA Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="argon2d_device.cuh">
      <Filter>CUDA Files</Filter>
    </ClInclude>
    <ClInclude Include="blake3_device.cuh">
      <Filter>CUDA Files</Filter>
    </ClInclude>
    <ClInclude Include="blaze3_cpu.cuh">
      <Filter>CUDA Files</Filter>
    </ClInclude>
    <ClInclude Include="rinhash_device.cuh">
      <Filter>CUDA Files</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="build-cuda.bat">
      <Filter>Build Files</Filter>
    </None>
    <None Include="CMakeLists.txt">
      <Filter>Build Files</Filter>
    </None>
    <None Include="Makefile">
      <Filter>Build Files</Filter>
    </None>
    <None Include="README.md">
      <Filter>Build Files</Filter>
    </None>
  </ItemGroup>
 </Project>
--- a/rin/miner/gpu/RinHash-cuda/argon2d_device.cuh
+++ b/rin/miner/gpu/RinHash-cuda/argon2d_device.cuh
@@ -0,0 +1,918 @@
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdio.h>
 //=== Argon2 定数 ===//
 #define ARGON2_BLOCK_SIZE 1024
 #define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8)
 #define ARGON2_OWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 16)
 #define ARGON2_HWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 32)
 #define ARGON2_SYNC_POINTS 4
 #define ARGON2_PREHASH_DIGEST_LENGTH 64
 #define ARGON2_PREHASH_SEED_LENGTH 72
 #define ARGON2_VERSION_10 0x10
 #define ARGON2_VERSION_13 0x13
 #define ARGON2_ADDRESSES_IN_BLOCK 128
 //=== Blake2b 定数 ===//
 #define BLAKE2B_BLOCKBYTES 128
 #define BLAKE2B_OUTBYTES 64
 #define BLAKE2B_KEYBYTES 64
 #define BLAKE2B_SALTBYTES 16
 #define BLAKE2B_PERSONALBYTES 16
 #define BLAKE2B_ROUNDS 12
 //=== 構造体定義 ===//
 typedef struct __align__(64) block_ {
    uint64_t v[ARGON2_QWORDS_IN_BLOCK];
 } block;
 typedef struct Argon2_instance_t {
    block *memory;          /* Memory pointer */
    uint32_t version;
    uint32_t passes;        /* Number of passes */
    uint32_t memory_blocks; /* Number of blocks in memory */
    uint32_t segment_length;
    uint32_t lane_length;
    uint32_t lanes;
    uint32_t threads;
    int print_internals; /* whether to print the memory blocks */
 } argon2_instance_t;
 /*
 * Argon2 position: where we construct the block right now. Used to distribute
 * work between threads.
 */
 typedef struct Argon2_position_t {
    uint32_t pass;
    uint32_t lane;
    uint8_t slice;
    uint32_t index;
 } argon2_position_t;
 typedef struct __blake2b_state {
    uint64_t h[8];
    uint64_t t[2];
    uint64_t f[2];
    uint8_t buf[BLAKE2B_BLOCKBYTES];
    unsigned buflen;
    unsigned outlen;
    uint8_t last_node;
 } blake2b_state;
 typedef struct __blake2b_param {
    uint8_t digest_length;                   /* 1 */
    uint8_t key_length;                      /* 2 */
    uint8_t fanout;                          /* 3 */
    uint8_t depth;                           /* 4 */
    uint32_t leaf_length;                    /* 8 */
    uint64_t node_offset;                    /* 16 */
    uint8_t node_depth;                      /* 17 */
    uint8_t inner_length;                    /* 18 */
    uint8_t reserved[14];                    /* 32 */
    uint8_t salt[BLAKE2B_SALTBYTES];         /* 48 */
    uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
 } blake2b_param;
 //=== 定数メモリ ===//
 __constant__ uint64_t blake2b_IV[8] = {
    0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
    0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
    0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
    0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
 };
 __constant__ uint8_t blake2b_sigma[12][16] = {
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
    {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
    {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
    {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
    {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
    {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
    {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
    {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
    {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}
 };
 //=== 共通ヘルパー関数 ===//
 __device__ __forceinline__ uint64_t rotr64(uint64_t x, uint32_t n) {
    return (x >> n) | (x << (64 - n));
 }
 // fBlaMka関数をCリファレンス実装と完全に一致させる
 __device__ __forceinline__ uint64_t fBlaMka(uint64_t x, uint64_t y) {
    const uint64_t m = 0xFFFFFFFFULL;
    uint64_t xy = (x & m) * (y & m);
    return x + y + 2 * xy;
 }
 // Blake2b G関数 - リファレンス実装と完全に一致させる
 __device__ __forceinline__ void blake2b_G(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t m1, uint64_t m2) {
    a = a + b + m1;
    d = rotr64(d ^ a, 32);
    c = c + d;
    b = rotr64(b ^ c, 24);
    a = a + b + m2;
    d = rotr64(d ^ a, 16);
    c = c + d;
    b = rotr64(b ^ c, 63);
 }
 // リトルエンディアンでの32ビット値の格納
 __device__ __forceinline__ void store32(void *dst, uint32_t w) {
    #if defined(NATIVE_LITTLE_ENDIAN)
        memcpy(dst, &w, sizeof w);
    #else
        uint8_t *p = (uint8_t *)dst;
        *p++ = (uint8_t)w;
        w >>= 8;
        *p++ = (uint8_t)w;
        w >>= 8;
        *p++ = (uint8_t)w;
        w >>= 8;
        *p++ = (uint8_t)w;
    #endif
    }
 __device__ __forceinline__ void blake2b_increment_counter(blake2b_state *S,
    uint64_t inc) {
 S->t[0] += inc;
 S->t[1] += (S->t[0] < inc);
 }
 __device__ __forceinline__ void blake2b_set_lastnode(blake2b_state *S) {
    S->f[1] = (uint64_t)-1;
 }
 __device__ __forceinline__ void blake2b_set_lastblock(blake2b_state *S) {
    if (S->last_node) {
        blake2b_set_lastnode(S);
    }
    S->f[0] = (uint64_t)-1;
 }
 // Add structure-specific memset function
 __device__ void blake2b_state_memset(blake2b_state* S) {
    for (int i = 0; i < sizeof(blake2b_state); i++) {
        ((uint8_t*)S)[i] = 0;
    }
 }
 // Add missing xor_block function
 __device__ void xor_block(block* dst, const block* src) {
    for (int i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
        dst->v[i] ^= src->v[i];
    }
 }
 // custom memcpy, apparently cuda's memcpy is slow 
 // when called within a kernel
 __device__ void c_memcpy(void *dest, const void *src, size_t n) {
    uint8_t *d = (uint8_t*)dest;
    const uint8_t *s = (const uint8_t*)src;
    for (size_t i = 0; i < n; i++) {
        d[i] = s[i];
    }
 }
 // Add missing copy_block function
 __device__ void copy_block(block* dst, const block* src) {
    c_memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
 }
 // fill_blockをCリファレンス実装と完全に一致させる
 __device__ void fill_block(const block* prev_block, const block* ref_block, block* next_block, int with_xor) {
    block blockR = {};
    block block_tmp = {};
    unsigned i;
    copy_block(&blockR, ref_block);
    xor_block(&blockR, prev_block);
    copy_block(&block_tmp, &blockR);
    if (with_xor) {
        xor_block(&block_tmp, next_block);
    }
    // G function without macro
    auto g = [](uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d) {
        a = fBlaMka(a, b);
        d = rotr64(d ^ a, 32);
        c = fBlaMka(c, d);
        b = rotr64(b ^ c, 24);
        a = fBlaMka(a, b);
        d = rotr64(d ^ a, 16);
        c = fBlaMka(c, d);
        b = rotr64(b ^ c, 63);
    };
    // BLAKE2_ROUND_NOMSG function without macro
    auto blake2_round = [&g](uint64_t& v0, uint64_t& v1, uint64_t& v2, uint64_t& v3,
                            uint64_t& v4, uint64_t& v5, uint64_t& v6, uint64_t& v7,
                            uint64_t& v8, uint64_t& v9, uint64_t& v10, uint64_t& v11,
                            uint64_t& v12, uint64_t& v13, uint64_t& v14, uint64_t& v15) {
        do {                                                                       
            g(v0, v4, v8, v12);                                                    
            g(v1, v5, v9, v13);                                                    
            g(v2, v6, v10, v14);                                                   
            g(v3, v7, v11, v15);                                                   
            g(v0, v5, v10, v15);                                                   
            g(v1, v6, v11, v12);                                                   
            g(v2, v7, v8, v13);                                                    
            g(v3, v4, v9, v14);                                                    
        } while ((void)0, 0);
    };
    // Apply Blake2 on columns
    for (i = 0; i < 8; ++i) {
        blake2_round(
            blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
            blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
            blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
            blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
            blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
            blockR.v[16 * i + 15]
        );
    }
    // Apply Blake2 on rows
    for (i = 0; i < 8; i++) {
        blake2_round(
            blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
            blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
            blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
            blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
            blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
            blockR.v[2 * i + 113]
        );
    }
    copy_block(next_block, &block_tmp);
    xor_block(next_block, &blockR);
 }
 template<typename T, typename ptr_t>
 __device__ void c_memset(ptr_t dest, T val, int count) {
    for(int i=0; i<count; i++)
        dest[i] = val;
 }
 __device__ void init_block_value(block *b, uint8_t in) { c_memset(b->v, in, sizeof(b->v)); }
 __device__  void next_addresses(block *address_block, block *input_block,
    const block *zero_block) {
 input_block->v[6]++;
 fill_block(zero_block, input_block, address_block, 0);
 fill_block(zero_block, address_block, address_block, 0);
 }
 __device__ void G1(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t x, uint64_t y) {
    a = a + b + x;
    d = rotr64(d ^ a, 32);
    c = c + d;
    b = rotr64(b ^ c, 24);
    a = a + b + y;
    d = rotr64(d ^ a, 16);
    c = c + d;
    b = rotr64(b ^ c, 63);
 }
 // Blake2b compression function F
 __device__ void blake2b_compress(blake2b_state* S, const uint8_t block[BLAKE2B_BLOCKBYTES]) {
    uint64_t m[16];
    uint64_t v[16];
    // Load message block into m[16]
    for (int i = 0; i < 16; i++) {
        const uint8_t* p = block + i * 8;
        m[i] = ((uint64_t)p[0])
             | ((uint64_t)p[1] << 8)
             | ((uint64_t)p[2] << 16)
             | ((uint64_t)p[3] << 24)
             | ((uint64_t)p[4] << 32)
             | ((uint64_t)p[5] << 40)
             | ((uint64_t)p[6] << 48)
             | ((uint64_t)p[7] << 56);
    }
    // Initialize v[0..15]
    for (int i = 0; i < 8; i++) {
        v[i] = S->h[i];
        v[i + 8] = blake2b_IV[i];
    }
    v[12] ^= S->t[0];
    v[13] ^= S->t[1];
    v[14] ^= S->f[0];
    v[15] ^= S->f[1];
    for (int r = 0; r < BLAKE2B_ROUNDS; r++) {
        const uint8_t* s = blake2b_sigma[r];
        // Column step
        G1(v[0], v[4], v[8], v[12], m[s[0]], m[s[1]]);
        G1(v[1], v[5], v[9], v[13], m[s[2]], m[s[3]]);
        G1(v[2], v[6], v[10], v[14], m[s[4]], m[s[5]]);
        G1(v[3], v[7], v[11], v[15], m[s[6]], m[s[7]]);
        // Diagonal step
        G1(v[0], v[5], v[10], v[15], m[s[8]], m[s[9]]);
        G1(v[1], v[6], v[11], v[12], m[s[10]], m[s[11]]);
        G1(v[2], v[7], v[8], v[13], m[s[12]], m[s[13]]);
        G1(v[3], v[4], v[9], v[14], m[s[14]], m[s[15]]);
    }
    // Finalization
    for (int i = 0; i < 8; i++) {
        S->h[i] ^= v[i] ^ v[i + 8];
    }
 }
 // Helper functions to load/store 64-bit values in little-endian order
 __device__ __forceinline__ uint64_t load64(const void* src) {
    const uint8_t* p = (const uint8_t*)src;
    return ((uint64_t)(p[0]))
        | ((uint64_t)(p[1]) << 8)
        | ((uint64_t)(p[2]) << 16)
        | ((uint64_t)(p[3]) << 24)
        | ((uint64_t)(p[4]) << 32)
        | ((uint64_t)(p[5]) << 40)
        | ((uint64_t)(p[6]) << 48)
        | ((uint64_t)(p[7]) << 56);
 }
 __device__ __forceinline__ void store64(void* dst, uint64_t w) {
    uint8_t* p = (uint8_t*)dst;
    p[0] = (uint8_t)(w);
    p[1] = (uint8_t)(w >> 8);
    p[2] = (uint8_t)(w >> 16);
    p[3] = (uint8_t)(w >> 24);
    p[4] = (uint8_t)(w >> 32);
    p[5] = (uint8_t)(w >> 40);
    p[6] = (uint8_t)(w >> 48);
    p[7] = (uint8_t)(w >> 56);
 }
 __device__ void load_block(block *dst, const void *input) {
    unsigned i;
    for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
        dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
    }
 }
 __device__ void store_block(void *output, const block *src) {
    unsigned i;
    for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
        store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
    }
 }
 // Blake2b init function to match reference implementation exactly
 __device__ int blake2b_init(blake2b_state* S, size_t outlen) {
    blake2b_param P;
    // Clear state using our custom function
    blake2b_state_memset(S);
    // Set parameters according to Blake2b spec
    P.digest_length = (uint8_t)outlen;
    P.key_length = 0;
    P.fanout = 1;
    P.depth = 1;
    P.leaf_length = 0;
    P.node_offset = 0;
    P.node_depth = 0;
    P.inner_length = 0;
    c_memset(P.reserved, 0, sizeof(P.reserved));
    c_memset(P.salt, 0, sizeof(P.salt));
    c_memset(P.personal, 0, sizeof(P.personal));
    // Initialize state vector with IV
    for (int i = 0; i < 8; i++) {
        S->h[i] = blake2b_IV[i];
    }
    const unsigned char *p = (const unsigned char *)(&P);
    /* IV XOR Parameter Block */
    for (int i = 0; i < 8; ++i) {
        S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
    }
    S->outlen = P.digest_length;
    return 0; // Success
 }
 __device__ int FLAG_clear_internal_memory = 0;
 __device__ void clear_internal_memory(void *v, size_t n) {
  if (FLAG_clear_internal_memory && v) {
 //    secure_wipe_memory(v, n);
  }
 }
 // Blake2b update function to match reference implementation
 __device__ int blake2b_update(blake2b_state* S, const uint8_t* in, size_t inlen) {
    const uint8_t *pin = (const uint8_t *)in;
    if (inlen == 0) {
        return 0;
    }
    /* Sanity check */
    if (S == NULL || in == NULL) {
        return -1;
    }
    /* Is this a reused state? */
    if (S->f[0] != 0) {
        return -1;
    }
    if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
        /* Complete current block */
        size_t left = S->buflen;
        size_t fill = BLAKE2B_BLOCKBYTES - left;
        c_memcpy(&S->buf[left], pin, fill);
        blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
        blake2b_compress(S, S->buf);
        S->buflen = 0;
        inlen -= fill;
        pin += fill;
        /* Avoid buffer copies when possible */
        while (inlen > BLAKE2B_BLOCKBYTES) {
            blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
            blake2b_compress(S, pin);
            inlen -= BLAKE2B_BLOCKBYTES;
            pin += BLAKE2B_BLOCKBYTES;
        }
    }
    c_memcpy(&S->buf[S->buflen], pin, inlen);
    S->buflen += (unsigned int)inlen;
    return 0; // Success
 }
 // Blake2b final function to match reference implementation
 __device__ int blake2b_final(blake2b_state* S, uint8_t* out, size_t outlen) {
    if (!S || !out)
        return -1;
    uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
    unsigned int i;
    blake2b_increment_counter(S, S->buflen);
    blake2b_set_lastblock(S);
    c_memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
    blake2b_compress(S, S->buf);
    for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
        store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
    }
    c_memcpy(out, buffer, S->outlen);
    return 0;
 }
 __device__ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
    size_t keylen) {
 blake2b_param P;
 if (S == NULL) {
 return -1;
 }
 /* Setup Parameter Block for keyed BLAKE2 */
 P.digest_length = (uint8_t)outlen;
 P.key_length = (uint8_t)keylen;
 P.fanout = 1;
 P.depth = 1;
 P.leaf_length = 0;
 P.node_offset = 0;
 P.node_depth = 0;
 P.inner_length = 0;
 c_memset(P.reserved, 0, sizeof(P.reserved));
 c_memset(P.salt, 0, sizeof(P.salt));
 c_memset(P.personal, 0, sizeof(P.personal));
    // Initialize state vector with IV
    for (int i = 0; i < 8; i++) {
        S->h[i] = blake2b_IV[i];
    }
    // XOR first element with param
    const unsigned char *p = (const unsigned char *)(&P);
    /* IV XOR Parameter Block */
    for (int i = 0; i < 8; ++i) {
        S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
    }
    S->outlen = P.digest_length;
 uint8_t block[BLAKE2B_BLOCKBYTES];
 c_memset(block, 0, BLAKE2B_BLOCKBYTES);
 c_memcpy(block, key, keylen);
 blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
 /* Burn the key from stack */
 clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
 return 0;
 }
 // Blake2b all-in-one function
 __device__ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
    const void *key, size_t keylen) {
 blake2b_state S;
 int ret = -1;
 /* Verify parameters */
 if (NULL == in && inlen > 0) {
 goto fail;
 }
 if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
 goto fail;
 }
 if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
 goto fail;
 }
 if (keylen > 0) {
 if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
    goto fail;
 }
 } else {
 if (blake2b_init(&S, outlen) < 0) {
    goto fail;
 }
 }
 if (blake2b_update(&S, (const uint8_t*)in, inlen) < 0) {
 goto fail;
 }
 ret = blake2b_final(&S, (uint8_t*)out, outlen);
 fail:
 clear_internal_memory(&S, sizeof(S));
 return ret;
 }
 // index_alpha関数を完全にCリファレンス実装と一致させる（関数のシグネチャも含め）
 __device__ uint32_t index_alpha(const argon2_instance_t *instance,
    const argon2_position_t *position, uint32_t pseudo_rand,
    int same_lane) {
        uint32_t reference_area_size;
        uint64_t relative_position;
        uint32_t start_position, absolute_position;
        if (0 == position->pass) {
            /* First pass */
            if (0 == position->slice) {
                /* First slice */
                reference_area_size =
                    position->index - 1; /* all but the previous */
            } else {
                if (same_lane) {
                    /* The same lane => add current segment */
                    reference_area_size =
                        position->slice * instance->segment_length +
                        position->index - 1;
                } else {
                    reference_area_size =
                        position->slice * instance->segment_length +
                        ((position->index == 0) ? (-1) : 0);
                }
            }
        } else {
            /* Second pass */
            if (same_lane) {
                reference_area_size = instance->lane_length -
                                      instance->segment_length + position->index -
                                      1;
            } else {
                reference_area_size = instance->lane_length -
                                      instance->segment_length +
                                      ((position->index == 0) ? (-1) : 0);
            }
        }
        /* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
         * relative position */
        relative_position = pseudo_rand;
        relative_position = relative_position * relative_position >> 32;
        relative_position = reference_area_size - 1 -
                            (reference_area_size * relative_position >> 32);
        /* 1.2.5 Computing starting position */
        start_position = 0;
        if (0 != position->pass) {
            start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
                                 ? 0
                                 : (position->slice + 1) * instance->segment_length;
        }
        /* 1.2.6. Computing absolute position */
        absolute_position = (start_position + relative_position) %
                            instance->lane_length; /* absolute position */
        return absolute_position;
 }
 // fill_segment関数を追加（Cリファレンス実装と完全に一致）
 __device__ void fill_segment(const argon2_instance_t *instance,
    argon2_position_t position) {
        block *ref_block = NULL, *curr_block = NULL;
    block address_block, input_block, zero_block;
    uint64_t pseudo_rand, ref_index, ref_lane;
    uint32_t prev_offset, curr_offset;
    uint32_t starting_index;
    uint32_t i;
    int data_independent_addressing;
    data_independent_addressing = false;
    if (data_independent_addressing) {
        init_block_value(&zero_block, 0);
        init_block_value(&input_block, 0);
        input_block.v[0] = position.pass;
        input_block.v[1] = position.lane;
        input_block.v[2] = position.slice;
        input_block.v[3] = instance->memory_blocks;
        input_block.v[4] = instance->passes;
        input_block.v[5] = 0;
    }
    starting_index = 0;
    if ((0 == position.pass) && (0 == position.slice)) {
        starting_index = 2; /* we have already generated the first two blocks */
        /* Don't forget to generate the first block of addresses: */
        if (data_independent_addressing) {
            next_addresses(&address_block, &input_block, &zero_block);
        }
    }
    /* Offset of the current block */
    curr_offset = position.lane * instance->lane_length +
                  position.slice * instance->segment_length + starting_index;
    if (0 == curr_offset % instance->lane_length) {
        /* Last block in this lane */
        prev_offset = curr_offset + instance->lane_length - 1;
    } else {
        /* Previous block */
        prev_offset = curr_offset - 1;
    }
    for (i = starting_index; i < instance->segment_length;
         ++i, ++curr_offset, ++prev_offset) {
        /*1.1 Rotating prev_offset if needed */
        if (curr_offset % instance->lane_length == 1) {
            prev_offset = curr_offset - 1;
        }
        /* 1.2 Computing the index of the reference block */
        /* 1.2.1 Taking pseudo-random value from the previous block */
        if (data_independent_addressing) {
            if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
                next_addresses(&address_block, &input_block, &zero_block);
            }
            pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
        } else {
            pseudo_rand = instance->memory[prev_offset].v[0];
        }
        /* 1.2.2 Computing the lane of the reference block */
        ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
        if ((position.pass == 0) && (position.slice == 0)) {
            /* Can not reference other lanes yet */
            ref_lane = position.lane;
        }
        /* 1.2.3 Computing the number of possible reference block within the
         * lane.
         */
        position.index = i;
        ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
                                ref_lane == position.lane);
        /* 2 Creating a new block */
        ref_block =
            instance->memory + instance->lane_length * ref_lane + ref_index;
        curr_block = instance->memory + curr_offset;
        if (ARGON2_VERSION_10 == instance->version) {
            /* version 1.2.1 and earlier: overwrite, not XOR */
            fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
        } else {
            if(0 == position.pass) {
                fill_block(instance->memory + prev_offset, ref_block,
                           curr_block, 0);
            } else {
                fill_block(instance->memory + prev_offset, ref_block,
                           curr_block, 1);
            }
        }
    }
 }
 // fill_memory関数をCリファレンス実装と完全に一致させる
 __device__ void fill_memory(block* memory, uint32_t passes, uint32_t lanes, uint32_t lane_length, uint32_t segment_length) {
    argon2_instance_t instance;
    instance.version = ARGON2_VERSION_13;
    instance.passes = passes;
    instance.memory = memory;
    instance.memory_blocks = lanes * lane_length;
    instance.segment_length = segment_length;
    instance.lane_length = lane_length;
    instance.lanes = lanes;
    instance.threads = lanes;
    instance.print_internals = 0;
    argon2_position_t position;
    for (uint32_t pass = 0; pass < passes; ++pass) {
        position.pass = pass;
        for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) {
            position.slice = slice;
            for (uint32_t lane = 0; lane < lanes; ++lane) {
                position.lane = lane;
                fill_segment(&instance, position);
            }
        }
    }
 }
 // blake2b_long関数をCリファレンス実装と完全に一致させる
 __device__ int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
    uint8_t *out = (uint8_t *)pout;
    blake2b_state blake_state;
    uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
    int ret = -1;
    if (outlen > UINT32_MAX) {
        goto fail;
    }
    /* Ensure little-endian byte order! */
    store32(outlen_bytes, (uint32_t)outlen);
 #define TRY(statement)                                                         \
    do {                                                                       \
        ret = statement;                                                       \
        if (ret < 0) {                                                         \
            goto fail;                                                         \
        }                                                                      \
    } while ((void)0, 0)
    if (outlen <= BLAKE2B_OUTBYTES) {
        TRY(blake2b_init(&blake_state, outlen));
        TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
        TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
        TRY(blake2b_final(&blake_state, out, outlen));
    } else {
        uint32_t toproduce;
        uint8_t out_buffer[BLAKE2B_OUTBYTES];
        uint8_t in_buffer[BLAKE2B_OUTBYTES];
        TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
        TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
        TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
        TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
        c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
        out += BLAKE2B_OUTBYTES / 2;
        toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
        while (toproduce > BLAKE2B_OUTBYTES) {
            c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
            TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES, NULL, 0));
            c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
            out += BLAKE2B_OUTBYTES / 2;
            toproduce -= BLAKE2B_OUTBYTES / 2;
        }
        c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
        TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
                    0));
        c_memcpy(out, out_buffer, toproduce);
    }
 fail:
    clear_internal_memory(&blake_state, sizeof(blake_state));
    return ret;
 #undef TRY
 }
 // device_argon2d_hash関数を完全にCリファレンス実装と一致させる
 __device__ void device_argon2d_hash(
    uint8_t* output,
    const uint8_t* input, size_t input_len,
    uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
    block* memory,
    const uint8_t* salt, size_t salt_len
 ) {
    // 1. メモリサイズの調整
    uint32_t memory_blocks = m_cost;
    if (memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
        memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
    }
    uint32_t segment_length = memory_blocks / (lanes * ARGON2_SYNC_POINTS);
    memory_blocks = segment_length * (lanes * ARGON2_SYNC_POINTS);
    uint32_t lane_length = segment_length * ARGON2_SYNC_POINTS;
    // 2. 初期ハッシュの計算
    uint8_t blockhash[ARGON2_PREHASH_DIGEST_LENGTH];
    blake2b_state BlakeHash;
    blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
    uint8_t value[sizeof(uint32_t)];
    store32(&value, lanes);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, 32);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, memory_blocks);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, t_cost);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, ARGON2_VERSION_13);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, 0);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, input_len);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    blake2b_update(&BlakeHash, (const uint8_t *)input, input_len);
    store32(&value, salt_len);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    blake2b_update(&BlakeHash, (const uint8_t *)salt, salt_len);
    store32(&value, 0);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, 0);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
    // 3. Initialize first blocks in each lane
    uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
    uint8_t initial_hash[ARGON2_PREHASH_SEED_LENGTH];
    c_memcpy(initial_hash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
    c_memset(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0, ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
    for (uint32_t l = 0; l < lanes; ++l) {
        store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
        store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
        blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
        load_block(&memory[l * lane_length], blockhash_bytes);
        store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
        blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
        load_block(&memory[l * lane_length + 1], blockhash_bytes);
    }
    // 4. Fill memory
    fill_memory(memory, t_cost, lanes, lane_length, segment_length);
    // 5. Final block mixing
    block final_block;
    copy_block(&final_block, &memory[0 * lane_length + (lane_length - 1)]);
    for (uint32_t l = 1; l < lanes; ++l) {
        uint32_t last_block_in_lane = l * lane_length + (lane_length - 1);
        xor_block(&final_block, &memory[last_block_in_lane]);
    }
    // 6. Final hash
    uint8_t final_block_bytes[ARGON2_BLOCK_SIZE];
    store_block(final_block_bytes, &final_block);
    blake2b_long(output, 32, final_block_bytes, ARGON2_BLOCK_SIZE);
 }
 //=== __global__ カーネル例（salt 指定版）===//
 // ホスト側でブロック用メモリをあらかじめ確保し、そのポインタ（memory_ptr）を渡すことを前提としています。
 __global__ void argon2d_hash_device_kernel(
    uint8_t* output,
    const uint8_t* input, size_t input_len,
    uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
    block* memory_ptr,   // ホスト側で確保したメモリ領域へのポインタ
    const uint8_t* salt, size_t salt_len
 ) {
    if (threadIdx.x == 0 && blockIdx.x == 0) {
        device_argon2d_hash(output, input, input_len, t_cost, m_cost, lanes, memory_ptr, salt, salt_len);
    }
 }
--- a/rin/miner/gpu/RinHash-cuda/blake3_device.cuh
+++ b/rin/miner/gpu/RinHash-cuda/blake3_device.cuh
@@ -0,0 +1,274 @@
 #include "blaze3_cpu.cuh"
 // Number of threads per thread block
 __constant__ const int NUM_THREADS = 16;
 // redefine functions, but for the GPU
 // all of them are the same but with g_ prefixed
 __constant__ const u32 g_IV[8] = {
    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 
    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
 };
 __constant__ const int g_MSG_PERMUTATION[] = {
    2, 6, 3, 10, 7, 0, 4, 13, 
    1, 11, 12, 5, 9, 14, 15, 8
 };
 __device__ u32 g_rotr(u32 value, int shift) {
    return (value >> shift)|(value << (usize - shift));
 }
 __device__ void g_g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
    state[a] = state[a] + state[b] + mx;
    state[d] = g_rotr((state[d] ^ state[a]), 16);
    state[c] = state[c] + state[d];
    state[b] = g_rotr((state[b] ^ state[c]), 12);
    state[a] = state[a] + state[b] + my;
    state[d] = g_rotr((state[d] ^ state[a]), 8);
    state[c] = state[c] + state[d];
    state[b] = g_rotr((state[b] ^ state[c]), 7);
 }
 __device__ void g_round(u32 state[16], u32 m[16]) {
    // Mix the columns.
    g_g(state, 0, 4, 8, 12, m[0], m[1]);
    g_g(state, 1, 5, 9, 13, m[2], m[3]);
    g_g(state, 2, 6, 10, 14, m[4], m[5]);
    g_g(state, 3, 7, 11, 15, m[6], m[7]);
    // Mix the diagonals.
    g_g(state, 0, 5, 10, 15, m[8], m[9]);
    g_g(state, 1, 6, 11, 12, m[10], m[11]);
    g_g(state, 2, 7, 8, 13, m[12], m[13]);
    g_g(state, 3, 4, 9, 14, m[14], m[15]);
 }
 __device__ void g_permute(u32 m[16]) {
    u32 permuted[16];
    for(int i=0; i<16; i++)
        permuted[i] = m[g_MSG_PERMUTATION[i]];
    for(int i=0; i<16; i++)
        m[i] = permuted[i];
 }
 // custom memcpy, apparently cuda's memcpy is slow 
 // when called within a kernel
 __device__ void g_memcpy(u32 *lhs, const u32 *rhs, int size) {
    // assuming u32 is 4 bytes
    int len = size / 4;
    for(int i=0; i<len; i++)
        lhs[i] = rhs[i];
 }
 // custom memset
 template<typename T, typename ptr_t>
 __device__ void g_memset(ptr_t dest, T val, int count) {
    for(int i=0; i<count; i++)
        dest[i] = val;
 }
 __device__ void g_compress(
    u32 *chaining_value,
    u32 *block_words,
    u64 counter,
    u32 block_len,
    u32 flags,
    u32 *state
 ) {
    // Search for better alternative
    g_memcpy(state, chaining_value, 32);
    g_memcpy(state+8, g_IV, 16);
    state[12] = (u32)counter;
    state[13] = (u32)(counter >> 32);
    state[14] = block_len;
    state[15] = flags;
    u32 block[16];
    g_memcpy(block, block_words, 64);
    g_round(state, block); // round 1
    g_permute(block);
    g_round(state, block); // round 2
    g_permute(block);
    g_round(state, block); // round 3
    g_permute(block);
    g_round(state, block); // round 4
    g_permute(block);
    g_round(state, block); // round 5
    g_permute(block);
    g_round(state, block); // round 6
    g_permute(block);
    g_round(state, block); // round 7
    for(int i=0; i<8; i++){
        state[i] ^= state[i + 8];
        state[i + 8] ^= chaining_value[i];
    }
 }
 __device__ void g_words_from_little_endian_bytes(
    u8 *bytes, u32 *words, u32 bytes_len
 ) {
    u32 tmp;
    for(u32 i=0; i<bytes_len; i+=4) {
        tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
        words[i/4] = tmp;
    }
 }
 __device__ void Chunk::g_compress_chunk(u32 out_flags) {
    if(flags&PARENT) {
        g_compress(
            key,
            data,
            0,  // counter is always zero for parent nodes
            BLOCK_LEN,
            flags | out_flags,
            raw_hash
        );
        return;
    }
    u32 chaining_value[8];
    u32 block_len = BLOCK_LEN, flagger;
    g_memcpy(chaining_value, key, 32);
    bool empty_input = (leaf_len==0);
    if(empty_input) {
        for(u32 i=0; i<BLOCK_LEN; i++)
            leaf_data[i] = 0U;
        leaf_len = BLOCK_LEN;
    }
    // move all mem allocs outside loop
    u32 block_words[16];
    u8 block_cast[BLOCK_LEN];
    for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
        flagger = flags;
        // for the last message block
        if(i+BLOCK_LEN > leaf_len)
            block_len = leaf_len%BLOCK_LEN;
        else
            block_len = BLOCK_LEN;
        // special case
        if(empty_input)
            block_len = 0;
        // clear up block_words
        g_memset(block_words, 0, 16);
        u32 new_block_len(block_len);
        if(block_len%4)
            new_block_len += 4 - (block_len%4);
        // This memcpy is fine since data is a byte array
        memcpy(block_cast, leaf_data+i, new_block_len*sizeof(*block_cast));
        g_words_from_little_endian_bytes(leaf_data+i, block_words, new_block_len);
        if(i==0)
            flagger |= CHUNK_START;
        if(i+BLOCK_LEN >= leaf_len)
            flagger |= CHUNK_END | out_flags;
        // raw hash for root node
        g_compress(
            chaining_value,
            block_words,
            counter,
            block_len,
            flagger,
            raw_hash
        );
        g_memcpy(chaining_value, raw_hash, 32);
    }
 }
 __global__ void compute(Chunk *data, int l, int r) {
    // n is always a power of 2
    int n = r-l;
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    if(tid >= n)
        return;
    if(n==1) {
        data[l].g_compress_chunk();
        // printf("Compressing : %d\n", l);
    }
    else {
        // Launch child kernels without synchronization (host will handle sync)
        compute<<<n/2,16>>>(data, l, l+n/2);
        compute<<<n/2,16>>>(data, l+n/2, r);
        // Wait for all threads in this block to finish
        __syncthreads();
        data[l].flags |= PARENT;
        memcpy(data[l].data, data[l].raw_hash, 32);
        memcpy(data[l].data+8, data[l+n/2].raw_hash, 32);
        data[l].g_compress_chunk();
        // printf("Compressing : %d to %d\n", l, r);
    }
 }
 // CPU version of light_hash (unchanged)
 void light_hash(Chunk *data, int N, Chunk *result, Chunk *memory_bar) {
    const int data_size = N*sizeof(Chunk);
    // Device settings
    // Allows DeviceSync to be called upto 16 levels of recursion
    cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 16);
    // Device vector
    Chunk *g_data = memory_bar;
    cudaMemcpy(g_data, data, data_size, cudaMemcpyHostToDevice);
    // Actual computation of hash
    compute<<<N,32>>>(g_data, 0, N);
    cudaMemcpy(result, g_data, sizeof(Chunk), cudaMemcpyDeviceToHost);
 }
 // Device-callable version of light_hash
 __device__ void light_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
    // Create a single chunk for processing the input
    Chunk chunk;
    // Initialize the chunk with the input data
    for (int i = 0; i < 8; i++) {
        chunk.key[i] = g_IV[i]; // Use device constant IV
    }
    // Copy the input data to leaf_data (with bounds checking)
    size_t copy_len = min(input_len, (size_t)BLOCK_LEN * 16); // Ensure we don't overflow
    for (size_t i = 0; i < copy_len; i++) {
        chunk.leaf_data[i] = input[i];
    }
    chunk.leaf_len = copy_len;
    chunk.counter = 0;
    chunk.flags = 0; // Default flags
    // Process the chunk directly
    chunk.g_compress_chunk(ROOT); // Set ROOT flag for final output
    // Copy the raw hash to the output
    for (int i = 0; i < 8; i++) {
        // Convert 32-bit words to bytes in little-endian format
        output[i*4]   = (uint8_t)(chunk.raw_hash[i]);
        output[i*4+1] = (uint8_t)(chunk.raw_hash[i] >> 8);
        output[i*4+2] = (uint8_t)(chunk.raw_hash[i] >> 16);
        output[i*4+3] = (uint8_t)(chunk.raw_hash[i] >> 24);
    }
 }
 // Alias for compatibility with other device code
 __device__ void blake3_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
    light_hash_device(input, input_len, output);
 }
--- a/rin/miner/gpu/RinHash-cuda/blaze3_cpu.cuh
+++ b/rin/miner/gpu/RinHash-cuda/blaze3_cpu.cuh
@@ -0,0 +1,419 @@
 #include <iostream>
 #include <algorithm>
 #include <cstring>
 #include <vector>
 using namespace std;
 // Let's use a pinned memory vector!
 // Removed Thrust pinned allocator dependency for portability
 // #include <thrust/host_vector.h>
 // #include <thrust/system/cuda/experimental/pinned_allocator.h>
 using u32 = uint32_t;
 using u64 = uint64_t;
 using u8  = uint8_t;
 const u32 OUT_LEN = 32;
 const u32 KEY_LEN = 32;
 const u32 BLOCK_LEN = 64;
 const u32 CHUNK_LEN = 1024;
 // Multiple chunks make a snicker bar :)
 const u32 SNICKER = 1U << 10;
 // Factory height and snicker size have an inversly propotional relationship
 // FACTORY_HT * (log2 SNICKER) + 10 >= 64 
 const u32 FACTORY_HT = 5;
 const u32 CHUNK_START = 1 << 0;
 const u32 CHUNK_END = 1 << 1;
 const u32 PARENT = 1 << 2;
 const u32 ROOT = 1 << 3;
 const u32 KEYED_HASH = 1 << 4;
 const u32 DERIVE_KEY_CONTEXT = 1 << 5;
 const u32 DERIVE_KEY_MATERIAL = 1 << 6;
 const int usize = sizeof(u32) * 8;
 u32 IV[8] = {
    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 
    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
 };
 const int MSG_PERMUTATION[] = {
    2, 6, 3, 10, 7, 0, 4, 13, 
    1, 11, 12, 5, 9, 14, 15, 8
 };
 u32 rotr(u32 value, int shift) {
    return (value >> shift)|(value << (usize - shift));
 }
 void g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
    state[a] = state[a] + state[b] + mx;
    state[d] = rotr((state[d] ^ state[a]), 16);
    state[c] = state[c] + state[d];
    state[b] = rotr((state[b] ^ state[c]), 12);
    state[a] = state[a] + state[b] + my;
    state[d] = rotr((state[d] ^ state[a]), 8);
    state[c] = state[c] + state[d];
    state[b] = rotr((state[b] ^ state[c]), 7);
 }
 void round(u32 state[16], u32 m[16]) {
    // Mix the columns.
    g(state, 0, 4, 8, 12, m[0], m[1]);
    g(state, 1, 5, 9, 13, m[2], m[3]);
    g(state, 2, 6, 10, 14, m[4], m[5]);
    g(state, 3, 7, 11, 15, m[6], m[7]);
    // Mix the diagonals.
    g(state, 0, 5, 10, 15, m[8], m[9]);
    g(state, 1, 6, 11, 12, m[10], m[11]);
    g(state, 2, 7, 8, 13, m[12], m[13]);
    g(state, 3, 4, 9, 14, m[14], m[15]);
 }
 void permute(u32 m[16]) {
    u32 permuted[16];
    for(int i=0; i<16; i++)
        permuted[i] = m[MSG_PERMUTATION[i]];
    for(int i=0; i<16; i++)
        m[i] = permuted[i];
 }
 void compress(
    u32 *chaining_value,
    u32 *block_words,
    u64 counter,
    u32 block_len,
    u32 flags,
    u32 *state
 ) {
    memcpy(state, chaining_value, 8*sizeof(*state));
    memcpy(state+8, IV, 4*sizeof(*state));
    state[12] = (u32)counter;
    state[13] = (u32)(counter >> 32);
    state[14] = block_len;
    state[15] = flags;
    u32 block[16];
    memcpy(block, block_words, 16*sizeof(*block));
    round(state, block); // round 1
    permute(block);
    round(state, block); // round 2
    permute(block);
    round(state, block); // round 3
    permute(block);
    round(state, block); // round 4
    permute(block);
    round(state, block); // round 5
    permute(block);
    round(state, block); // round 6
    permute(block);
    round(state, block); // round 7
    for(int i=0; i<8; i++){
        state[i] ^= state[i + 8];
        state[i + 8] ^= chaining_value[i];
    }
 }
 void words_from_little_endian_bytes(u8 *bytes, u32 *words, u32 bytes_len) {
    u32 tmp;
    for(u32 i=0; i<bytes_len; i+=4) {
        tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
        words[i/4] = tmp;
    }
 }
 struct Chunk {
    // use only when it is a leaf node
    // leaf data may have less than 1024 bytes
    u8 leaf_data[1024];
    u32 leaf_len;
    // use in all other cases
    // data will always have 64 bytes
    u32 data[16];
    u32 flags;
    u32 raw_hash[16];
    u32 key[8];
    // only useful for leaf nodes
    u64 counter;
    // Constructor for leaf nodes
    __device__ __host__ Chunk(char *input, int size, u32 _flags, u32 *_key, u64 ctr){
        counter = ctr;
        flags = _flags;
        memcpy(key, _key, 8*sizeof(*key));
        memset(leaf_data, 0, 1024);
        memcpy(leaf_data, input, size);
        leaf_len = size;
    }
    __device__ __host__ Chunk(u32 _flags, u32 *_key) {
        counter = 0;
        flags = _flags;
        memcpy(key, _key, 8*sizeof(*key));
        leaf_len = 0;
    }
    __device__ __host__ Chunk() {}
    // Chunk() : leaf_len(0) {}
    // process data in sizes of message blocks and store cv in hash
    void compress_chunk(u32=0);
    __device__ void g_compress_chunk(u32=0);
 };
 void Chunk::compress_chunk(u32 out_flags) {
    if(flags&PARENT) {
        compress(
            key,
            data,
            0,  // counter is always zero for parent nodes
            BLOCK_LEN,
            flags | out_flags,
            raw_hash
        );
        return;
    }
    u32 chaining_value[8], block_len = BLOCK_LEN, flagger;
    memcpy(chaining_value, key, 8*sizeof(*chaining_value));
    bool empty_input = (leaf_len==0);
    if(empty_input) {
        for(u32 i=0; i<BLOCK_LEN; i++)
            leaf_data[i] = 0U;
        leaf_len = BLOCK_LEN;
    }
    for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
        flagger = flags;
        // for the last message block
        if(i+BLOCK_LEN > leaf_len)
            block_len = leaf_len%BLOCK_LEN;
        else
            block_len = BLOCK_LEN;
        // special case
        if(empty_input)
            block_len = 0;
        u32 block_words[16];
        memset(block_words, 0, 16*sizeof(*block_words));
        u32 new_block_len(block_len);
        if(block_len%4)
            new_block_len += 4 - (block_len%4);
        // BLOCK_LEN is the max possible length of block_cast
        u8 block_cast[BLOCK_LEN];
        memset(block_cast, 0, new_block_len*sizeof(*block_cast));
        memcpy(block_cast, leaf_data+i, block_len*sizeof(*block_cast));
        words_from_little_endian_bytes(block_cast, block_words, new_block_len);
        if(i==0)
            flagger |= CHUNK_START;
        if(i+BLOCK_LEN >= leaf_len)
            flagger |= CHUNK_END | out_flags;
        // raw hash for root node
        compress(
            chaining_value,
            block_words,
            counter,
            block_len,
            flagger,
            raw_hash
        );
        memcpy(chaining_value, raw_hash, 8*sizeof(*chaining_value));
    }
 }
 // Fallback alias: use std::vector instead of thrust pinned host vector
 using thrust_vector = std::vector<Chunk>;
 // The GPU hasher
 void light_hash(Chunk*, int, Chunk*, Chunk*);
 // Sanity checks
 Chunk hash_many(Chunk *data, int first, int last, Chunk *memory_bar) {
    // n will always be a power of 2
    int n = last-first;
    // Reduce GPU calling overhead
    if(n == 1) {
        data[first].compress_chunk();
        return data[first];
    }
    Chunk ret;
    light_hash(data+first, n, &ret, memory_bar);
    return ret;
    // CPU style execution
    // Chunk left, right;
    // left = hash_many(data, first, first+n/2);
    // right = hash_many(data, first+n/2, last);
    // Chunk parent(left.flags, left.key);
    // parent.flags |= PARENT;
    // memcpy(parent.data, left.raw_hash, 32);
    // memcpy(parent.data+8, right.raw_hash, 32);
    // parent.compress_chunk();
    // return parent;
 }
 Chunk merge(Chunk &left, Chunk &right);
 void hash_root(Chunk &node, vector<u8> &out_slice);
 struct Hasher {
    u32 key[8];
    u32 flags;
    u64 ctr;
    u64 file_size;
    // A memory bar for CUDA to use during it's computation
    Chunk* memory_bar;
    // Factory is an array of FACTORY_HT possible SNICKER bars
    thrust_vector factory[FACTORY_HT];
    // methods
    static Hasher new_internal(u32 key[8], u32 flags, u64 fsize);
    static Hasher _new(u64);
    // initializes cuda memory (if needed)
    void init();
    // frees cuda memory (if it is there)
    // free nullptr is a no-op
    ~Hasher() { 
        if(memory_bar)
            cudaFree(memory_bar); 
        else
            free(memory_bar);
    }
    void update(char *input, int size);
    void finalize(vector<u8> &out_slice);
    void propagate();
 };
 Hasher Hasher::new_internal(u32 key[8], u32 flags, u64 fsize) {
    return Hasher{
        {
            key[0], key[1], key[2], key[3],
            key[4], key[5], key[6], key[7]
        },
        flags,
        0,   // counter
        fsize
    };
 }
 Hasher Hasher::_new(u64 fsize) { return new_internal(IV, 0, fsize); }
 void Hasher::init() {
    if(file_size<1) {
        memory_bar = nullptr;
        return;
    }
    u64 num_chunks = ceil(file_size / CHUNK_LEN);
    u32 bar_size = min(num_chunks, (u64)SNICKER);
    // Just for safety :)
    ++bar_size;
    cudaMalloc(&memory_bar, bar_size*sizeof(Chunk));
    // Let the most commonly used places always have memory
    // +1 so that it does not resize when it hits CHUNK_LEN
    u32 RESERVE = SNICKER + 1;
    factory[0].reserve(RESERVE);
    factory[1].reserve(RESERVE);
 }
 void Hasher::propagate() {
    int level=0;
    // nodes move to upper levels if lower one is one SNICKER long
    while(factory[level].size() == SNICKER) {
        Chunk subtree = hash_many(factory[level].data(), 0, SNICKER, memory_bar);
        factory[level].clear();
        ++level;
        factory[level].push_back(subtree);
    }
 } 
 void Hasher::update(char *input, int size) {
    factory[0].push_back(Chunk(input, size, flags, key, ctr));
    ++ctr;
    if(factory[0].size() == SNICKER)
        propagate();
 }
 void Hasher::finalize(vector<u8> &out_slice) {
    Chunk root(flags, key);
    for(int i=0; i<FACTORY_HT; i++) {
        vector<Chunk> subtrees;
        u32 n = factory[i].size(), divider=SNICKER;
        if(!n)
            continue;
        int start = 0;
        while(divider) {
            if(n&divider) {
                Chunk subtree = hash_many(factory[i].data(), start, start+divider, memory_bar);
                subtrees.push_back(subtree);
                start += divider;
            }
            divider >>= 1;
        }
        while(subtrees.size()>1) {
            Chunk tmp1 = subtrees.back();
            subtrees.pop_back();
            Chunk tmp2 = subtrees.back();
            subtrees.pop_back();
            // tmp2 is the left child
            // tmp1 is the right child
            // that's the order they appear within the array
            Chunk tmp = merge(tmp2, tmp1);
            subtrees.push_back(tmp);
        }
        if(i<FACTORY_HT-1)
            factory[i+1].push_back(subtrees[0]);
        else
            root = subtrees[0];
    }
    hash_root(root, out_slice);
 }
 Chunk merge(Chunk &left, Chunk &right) {
    // cout << "Called merge once\n";
    left.compress_chunk();
    right.compress_chunk();
    Chunk parent(left.flags, left.key);
    parent.flags |= PARENT;
    // 32 bytes need to be copied for all of these
    memcpy(parent.data, left.raw_hash, 32);
    memcpy(parent.data+8, right.raw_hash, 32);
    return parent;
 }
 void hash_root(Chunk &node, vector<u8> &out_slice) {
    // the last message block must not be hashed like the others
    // it needs to be hashed with the root flag
    u64 output_block_counter = 0;
    u64 i=0, k=2*OUT_LEN;
    u32 words[16] = {};
    for(; int(out_slice.size()-i)>0; i+=k) {
        node.counter = output_block_counter;
        node.compress_chunk(ROOT);
        // words is u32[16]
        memcpy(words, node.raw_hash, 16*sizeof(*words));
        vector<u8> out_block(min(k, (u64)out_slice.size()-i));
        for(u32 l=0; l<out_block.size(); l+=4) {
            for(u32 j=0; j<min(4U, (u32)out_block.size()-l); j++)
                out_block[l+j] = (words[l/4]>>(8*j)) & 0x000000FF;
        }
        for(u32 j=0; j<out_block.size(); j++)
            out_slice[i+j] = out_block[j];
        ++output_block_counter;
    }
 }
--- a/rin/miner/gpu/RinHash-cuda/build-cuda-linux.sh
+++ b/rin/miner/gpu/RinHash-cuda/build-cuda-linux.sh
@@ -0,0 +1,99 @@
 #!/bin/bash
 # RinHash CUDA Build Script for Linux/WSL
 # This script builds the CUDA implementation of RinHash
 echo "======================================"
 echo "  RinHash CUDA Miner Build Script"
 echo "======================================"
 # Check if NVCC is available
 if ! command -v nvcc &> /dev/null; then
    echo "ERROR: NVCC not found in PATH"
    echo "Please install CUDA Toolkit"
    echo "On Ubuntu/Debian: sudo apt install nvidia-cuda-toolkit"
    echo "Or download from: https://developer.nvidia.com/cuda-downloads"
    exit 1
 fi
 echo "NVCC found:"
 nvcc --version
 echo ""
 # Check if gcc/g++ is available
 if ! command -v gcc &> /dev/null; then
    echo "ERROR: GCC not found in PATH"
    echo "Please install build-essential: sudo apt install build-essential"
    exit 1
 fi
 echo "GCC found:"
 gcc --version | head -1
 echo ""
 echo "Building RinHash CUDA miner..."
 echo ""
 # Create output directory
 mkdir -p bin
 # Compile with NVCC (enable device linking for dynamic parallelism)
 nvcc -O3 -std=c++11 \
     -arch=sm_50 \
     -gencode arch=compute_50,code=sm_50 \
     -gencode arch=compute_52,code=sm_52 \
     -gencode arch=compute_60,code=sm_60 \
     -gencode arch=compute_61,code=sm_61 \
     -gencode arch=compute_70,code=sm_70 \
     -gencode arch=compute_75,code=sm_75 \
     -gencode arch=compute_80,code=sm_80 \
     -gencode arch=compute_86,code=sm_86 \
     -I. \
     rinhash.cu sha3-256.cu \
     -o bin/rinhash-cuda-miner \
     -lcuda -lcudart -lcudadevrt
 # Also build test program
 echo "Building test program..."
 nvcc -O3 -std=c++11 \
     -arch=sm_50 \
     -gencode arch=compute_50,code=sm_50 \
     -gencode arch=compute_52,code=sm_52 \
     -gencode arch=compute_60,code=sm_60 \
     -gencode arch=compute_61,code=sm_61 \
     -gencode arch=compute_70,code=sm_70 \
     -gencode arch=compute_75,code=sm_75 \
     -gencode arch=compute_80,code=sm_80 \
     -gencode arch=compute_86,code=sm_86 \
     -I. \
     test_miner.cu rinhash.cu sha3-256.cu \
     -o bin/test_miner \
     -lcuda -lcudart -lcudadevrt
 if [ $? -eq 0 ]; then
    echo ""
    echo "======================================"
    echo "   BUILD SUCCESSFUL!"
    echo "======================================"
    echo ""
    echo "Executables created:"
    echo "  - bin/rinhash-cuda-miner (main miner)"
    echo "  - bin/test_miner (test program)"
    echo ""
    echo "To test the miner:"
    echo "  ./bin/test_miner"
    echo ""
 else
    echo ""
    echo "======================================"
    echo "   BUILD FAILED!"
    echo "======================================"
    echo ""
    echo "Common issues:"
    echo "1. Missing CUDA runtime libraries"
    echo "2. Incompatible CUDA version"
    echo "3. Missing development tools"
    echo ""
    exit 1
 fi
 echo "Build completed successfully!"
--- a/rin/miner/gpu/RinHash-cuda/build-cuda.bat
+++ b/rin/miner/gpu/RinHash-cuda/build-cuda.bat
@@ -0,0 +1,97 @@
@echo off
 REM RinHash CUDA Build Script
 REM This script attempts to build the CUDA implementation of RinHash
 echo ======================================
 echo   RinHash CUDA Miner Build Script
 echo ======================================
 REM Check if NVCC is available
 where nvcc >nul 2>nul
 if errorlevel 1 (
    echo ERROR: NVCC not found in PATH
    echo Please install CUDA Toolkit
    goto :error
 )
 echo NVCC found: 
 nvcc --version
 echo.
 REM Try to find Visual Studio
 set "VS2019_PATH=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
 set "VS2022_PATH=C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"
 if exist "%VS2022_PATH%" (
    echo Using Visual Studio 2022...
    call "%VS2022_PATH%"
    goto :compile
 )
 if exist "%VS2019_PATH%" (
    echo Using Visual Studio 2019 Build Tools...
    call "%VS2019_PATH%"
    goto :compile
 )
 echo ERROR: No Visual Studio installation found
 echo.
 echo SOLUTION 1: Install Visual Studio Community 2022 (free)
 echo   - Download from: https://visualstudio.microsoft.com/downloads/
 echo   - Make sure to include "Desktop development with C++" workload
 echo   - Include Windows 10/11 SDK
 echo.
 echo SOLUTION 2: Install Visual Studio Build Tools 2022
 echo   - Download from: https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2022
 echo   - Include C++ build tools and Windows SDK
 echo.
 goto :error
 :compile
 echo.
 echo Building RinHash CUDA miner...
 echo.
 REM Compile with NVCC (enable device linking for dynamic parallelism)
 nvcc -O3 -rdc=true -arch=sm_50 ^
     -gencode arch=compute_50,code=sm_50 ^
     -I. rinhash.cu sha3-256.cu ^
     -o rinhash-cuda-miner.exe ^
     -lcuda -lcudart -lcudadevrt
 if errorlevel 1 (
    echo.
    echo BUILD FAILED!
    echo.
    echo Common issues:
    echo 1. Missing Windows SDK - install via Visual Studio Installer
    echo 2. Incompatible Visual Studio version
    echo 3. Missing CUDA runtime libraries
    echo.
    goto :error
 )
 echo.
 echo ======================================
 echo   BUILD SUCCESSFUL!
 echo ======================================
 echo.
 echo Executable created: rinhash-cuda-miner.exe
 echo.
 echo To test the miner:
 echo   rinhash-cuda-miner.exe --help
 echo.
 goto :end
 :error
 echo.
 echo ======================================
 echo   BUILD FAILED!
 echo ======================================
 echo.
 pause
 exit /b 1
 :end
 echo Build completed successfully!
 pause
--- a/rin/miner/gpu/RinHash-cuda/rinhash.cu
+++ b/rin/miner/gpu/RinHash-cuda/rinhash.cu
@@ -0,0 +1,232 @@
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <vector>
 #include <stdexcept>
 // Include shared device functions
 #include "rinhash_device.cuh"
 #include "argon2d_device.cuh"
 #include "sha3-256.cu"
 #include "blake3_device.cuh"
 // Modified kernel to use device functions
 extern "C" __global__ void rinhash_cuda_kernel(
    const uint8_t* input,
    size_t input_len,
    uint8_t* output,
    block* argon2_memory
 ) {
    __shared__ uint8_t blake3_out[32];
    __shared__ uint8_t argon2_out[32];
    if (threadIdx.x == 0) {
        light_hash_device(input, input_len, blake3_out);
        uint8_t salt[11] = { 'R','i','n','C','o','i','n','S','a','l','t' };
        device_argon2d_hash(argon2_out, blake3_out, 32, 2, 64, 1, argon2_memory, salt, 11);
        uint8_t sha3_out[32];
        sha3_256_device(argon2_out, 32, sha3_out);
        for (int i = 0; i < 32; i++) output[i] = sha3_out[i];
    }
    __syncthreads();
 }
 // RinHash CUDA implementation
 extern "C" void rinhash_cuda(const uint8_t* input, size_t input_len, uint8_t* output) {
    const uint32_t m_cost = 64; // Argon2 blocks (64 KiB)
    uint8_t *d_input = nullptr;
    uint8_t *d_output = nullptr;
    block  *d_memory = nullptr;
    cudaError_t err;
    err = cudaMalloc(&d_input, input_len);
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA error: Failed to allocate input memory: %s\n", cudaGetErrorString(err));
        return;
    }
    err = cudaMalloc(&d_output, 32);
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA error: Failed to allocate output memory: %s\n", cudaGetErrorString(err));
        cudaFree(d_input);
        return;
    }
    err = cudaMalloc(&d_memory, m_cost * sizeof(block));
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    err = cudaMemcpy(d_input, input, input_len, cudaMemcpyHostToDevice);
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA error: Failed to copy input to device: %s\n", cudaGetErrorString(err));
        cudaFree(d_memory);
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    rinhash_cuda_kernel<<<1, 1>>>(d_input, input_len, d_output, d_memory);
    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA error during kernel execution: %s\n", cudaGetErrorString(err));
        cudaFree(d_memory);
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
    if (err != cudaSuccess) {
        fprintf(stderr, "CUDA error: Failed to copy output from device: %s\n", cudaGetErrorString(err));
    }
    cudaFree(d_memory);
    cudaFree(d_input);
    cudaFree(d_output);
 }
 // Helper function to convert a block header to bytes
 extern "C" void blockheader_to_bytes(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    const uint32_t* nonce,
    uint8_t* output,
    size_t* output_len
 ) {
    size_t offset = 0;
    memcpy(output + offset, version, 4); offset += 4;
    memcpy(output + offset, prev_block, 32); offset += 32;
    memcpy(output + offset, merkle_root, 32); offset += 32;
    memcpy(output + offset, timestamp, 4); offset += 4;
    memcpy(output + offset, bits, 4); offset += 4;
    memcpy(output + offset, nonce, 4); offset += 4;
    *output_len = offset;
 }
 // Batch processing version for mining (sequential per header for now)
 extern "C" void rinhash_cuda_batch(
    const uint8_t* block_headers,
    size_t block_header_len,
    uint8_t* outputs,
    uint32_t num_blocks
 ) {
    const uint32_t m_cost = 64;
    uint8_t *d_input = NULL;
    uint8_t *d_output = NULL;
    block  *d_memory = NULL;
    cudaError_t err;
    err = cudaMalloc((void**)&d_input, block_header_len);
    if (err != cudaSuccess) { fprintf(stderr, "CUDA error: alloc header: %s\n", cudaGetErrorString(err)); return; }
    err = cudaMalloc((void**)&d_output, 32);
    if (err != cudaSuccess) { fprintf(stderr, "CUDA error: alloc output: %s\n", cudaGetErrorString(err)); cudaFree(d_input); return; }
    err = cudaMalloc((void**)&d_memory, m_cost * sizeof(block));
    if (err != cudaSuccess) { fprintf(stderr, "CUDA error: alloc argon2 mem: %s\n", cudaGetErrorString(err)); cudaFree(d_input); cudaFree(d_output); return; }
    for (uint32_t i = 0; i < num_blocks; i++) {
        const uint8_t* input = block_headers + i * block_header_len;
        uint8_t* output = outputs + i * 32;
        err = cudaMemcpy(d_input, input, block_header_len, cudaMemcpyHostToDevice);
        if (err != cudaSuccess) { fprintf(stderr, "CUDA error: copy header %u: %s\n", i, cudaGetErrorString(err)); break; }
        rinhash_cuda_kernel<<<1, 1>>>(d_input, block_header_len, d_output, d_memory);
        err = cudaDeviceSynchronize();
        if (err != cudaSuccess) { fprintf(stderr, "CUDA error in kernel %u: %s\n", i, cudaGetErrorString(err)); break; }
        err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
        if (err != cudaSuccess) { fprintf(stderr, "CUDA error: copy out %u: %s\n", i, cudaGetErrorString(err)); break; }
    }
    cudaFree(d_memory);
    cudaFree(d_output);
    cudaFree(d_input);
 }
 // Main RinHash function that would be called from outside
 extern "C" void RinHash(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    const uint32_t* nonce,
    uint8_t* output
 ) {
    uint8_t block_header[80]; // Standard block header size
    size_t block_header_len;
    blockheader_to_bytes(
        version,
        prev_block,
        merkle_root,
        timestamp,
        bits,
        nonce,
        block_header,
        &block_header_len
    );
    rinhash_cuda(block_header, block_header_len, output);
 }
 // Mining function that tries different nonces
 extern "C" void RinHash_mine(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    uint32_t start_nonce,
    uint32_t num_nonces,
    uint32_t* found_nonce,
    uint8_t* target_hash,
    uint8_t* best_hash
 ) {
    const size_t block_header_len = 80;
    std::vector<uint8_t> block_headers(block_header_len * num_nonces);
    std::vector<uint8_t> hashes(32 * num_nonces);
    for (uint32_t i = 0; i < num_nonces; i++) {
        uint32_t current_nonce = start_nonce + i;
        uint8_t* header = block_headers.data() + i * block_header_len;
        size_t header_len;
        blockheader_to_bytes(
            version,
            prev_block,
            merkle_root,
            timestamp,
            bits,
            &current_nonce,
            header,
            &header_len
        );
    }
    rinhash_cuda_batch(block_headers.data(), block_header_len, hashes.data(), num_nonces);
    memcpy(best_hash, hashes.data(), 32);
    *found_nonce = start_nonce;
    for (uint32_t i = 1; i < num_nonces; i++) {
        uint8_t* current_hash = hashes.data() + i * 32;
        bool is_better = false;
        for (int j = 0; j < 32; j++) {
            if (current_hash[j] < best_hash[j]) { is_better = true; break; }
            else if (current_hash[j] > best_hash[j]) { break; }
        }
        if (is_better) { memcpy(best_hash, current_hash, 32); *found_nonce = start_nonce + i; }
    }
 }
--- a/rin/miner/gpu/RinHash-cuda/rinhash_device.cuh
+++ b/rin/miner/gpu/RinHash-cuda/rinhash_device.cuh
@@ -0,0 +1,8 @@
 #ifndef RINHASH_DEVICE_CUH
 #define RINHASH_DEVICE_CUH
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <stdint.h>
 #endif // RINHASH_DEVICE_CUH
--- a/rin/miner/gpu/RinHash-cuda/sha3-256.cu
+++ b/rin/miner/gpu/RinHash-cuda/sha3-256.cu
@@ -0,0 +1,140 @@
 #include <stdint.h>
 #include <stddef.h>
 #define KECCAKF_ROUNDS 24
 // 64bit 値のビット回転（左回転）
 __device__ inline uint64_t rotate(uint64_t x, int n) {
    return (x << n) | (x >> (64 - n));
 }
 // Keccak‐f[1600] 変換（内部状態 st[25] に対して 24 ラウンドの permutation を実行）
 __device__ inline uint64_t ROTL64(uint64_t x, int n) {
    return (x << n) | (x >> (64 - n));
 }
 __device__ void keccakf(uint64_t st[25]) {
    const int R[24] = {
         1,  3,  6, 10, 15, 21,
        28, 36, 45, 55,  2, 14,
        27, 41, 56,  8, 25, 43,
        62, 18, 39, 61, 20, 44
    };
    const int P[24] = {
        10,  7, 11, 17, 18, 3,
         5, 16, 8, 21, 24, 4,
        15, 23, 19, 13, 12, 2,
        20, 14, 22,  9, 6,  1
    };
    const uint64_t RC[24] = {
        0x0000000000000001ULL, 0x0000000000008082ULL,
        0x800000000000808aULL, 0x8000000080008000ULL,
        0x000000000000808bULL, 0x0000000080000001ULL,
        0x8000000080008081ULL, 0x8000000000008009ULL,
        0x000000000000008aULL, 0x0000000000000088ULL,
        0x0000000080008009ULL, 0x000000008000000aULL,
        0x000000008000808bULL, 0x800000000000008bULL,
        0x8000000000008089ULL, 0x8000000000008003ULL,
        0x8000000000008002ULL, 0x8000000000000080ULL,
        0x000000000000800aULL, 0x800000008000000aULL,
        0x8000000080008081ULL, 0x8000000000008080ULL,
        0x0000000080000001ULL, 0x8000000080008008ULL
    };
    int i, j, round;
    uint64_t t, bc[5];
    for (round = 0; round < 24; round++) {
        // Theta
        for (i = 0; i < 5; i++)
            bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
        for (i = 0; i < 5; i++) {
            t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
            for (j = 0; j < 25; j += 5)
                st[j + i] ^= t;
        }
        // Rho and Pi
        t = st[1];
        for (i = 0; i < 24; i++) {
            j = P[i];
            bc[0] = st[j];
            st[j] = ROTL64(t, R[i]);
            t = bc[0];
        }
        // Chi
        for (j = 0; j < 25; j += 5) {
            for (i = 0; i < 5; i++)
                bc[i] = st[j + i];
            for (i = 0; i < 5; i++)
                st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
        }
        // Iota
        st[0] ^= RC[round];
    }
 }
 // little-endian で 64bit 値を読み込む（8 バイトの配列から）
 __device__ inline uint64_t load64_le(const uint8_t *src) {
    uint64_t x = 0;
    #pragma unroll
    for (int i = 0; i < 8; i++) {
        x |= ((uint64_t)src[i]) << (8 * i);
    }
    return x;
 }
 // little-endian で 64bit 値を書き込む（8 バイトの配列へ）
 __device__ inline void store64_le(uint8_t *dst, uint64_t x) {
    #pragma unroll
    for (int i = 0; i < 8; i++) {
        dst[i] = (uint8_t)(x >> (8 * i));
    }
 }
 /*
  __device__ 関数 sha3_256_device
    ・引数 input, inlen で与えられる入力データを吸収し、
      SHA3-256 仕様によりパディングおよび Keccak-f[1600] 変換を実行します。
    ・最終的に内部状態の先頭 32 バイト（4 ワード）を little-endian 形式で
      hash_out に出力します。
    ・SHA3-256 ではレート（吸収部サイズ）が 136 バイトです。
 */
 __device__ void sha3_256_device(const uint8_t *input, size_t inlen, uint8_t *hash_out) {
    const size_t rate = 136; // SHA3-256 の吸収部サイズ（バイト単位）
    uint64_t st[25] = {0};   // 内部状態（25ワード＝1600ビット）
    for (int i = 0; i < 25; i++) st[i] = 0;
    // size_t offset = 0; // Removed unused variable
    // 通常ブロック（rateバイト）処理（今回inlen=32なのでスキップされるはず）
    while (inlen >= rate) {
        // 吸収
        for (int i = 0; i < (rate / 8); i++) {
            st[i] ^= load64_le(input + i * 8);
        }
        // 最終 Keccak-f
        keccakf(st);
        input += rate;
        inlen -= rate;
    }
    for (int i = 0; i < 4; i++) {
        st[i] ^= load64_le(input + i * 8);  // 4 * 8 = 32バイト
    }
    ((uint8_t*)st)[32] ^= 0x06;  // パディング（32バイト目）
    ((uint8_t*)st)[rate - 1] ^= 0x80;     // パディング（最後のバイト）
    keccakf(st);  // 最終 Keccak-f
    // スクイーズ：出力32バイト
    for (int i = 0; i < 4; i++) {
        store64_le(hash_out + i * 8, st[i]);
    }
 }
--- a/rin/miner/gpu/RinHash-cuda/test_miner.cu
+++ b/rin/miner/gpu/RinHash-cuda/test_miner.cu
@@ -0,0 +1,85 @@
 #include <cuda_runtime.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
 // External functions from our CUDA implementation
 extern "C" void RinHash(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    const uint32_t* nonce,
    uint8_t* output
 );
 extern "C" void RinHash_mine(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    uint32_t start_nonce,
    uint32_t num_nonces,
    uint32_t* found_nonce,
    uint8_t* target_hash,
    uint8_t* best_hash
 );
 void print_hex(const char* label, const uint8_t* data, size_t len) {
    printf("%s: ", label);
    for (size_t i = 0; i < len; i++) {
        printf("%02x", data[i]);
    }
    printf("\n");
 }
 int main(int argc, char* argv[]) {
    printf("RinHash CUDA Miner Test\n");
    printf("=======================\n\n");
    // Initialize CUDA
    cudaError_t cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU?\n");
        return 1;
    }
    // Test data - sample block header
    uint32_t version = 0x20000000;
    uint32_t prev_block[8] = {
        0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0,
        0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0
    };
    uint32_t merkle_root[8] = {
        0xabcdef12, 0x34567890, 0xabcdef12, 0x34567890,
        0xabcdef12, 0x34567890, 0xabcdef12, 0x34567890
    };
    uint32_t timestamp = 0x5f123456;
    uint32_t bits = 0x1d00ffff;
    uint32_t nonce = 0x12345678;
    uint8_t output[32];
    printf("Testing single hash...\n");
    RinHash(&version, prev_block, merkle_root, &timestamp, &bits, &nonce, output);
    print_hex("Hash result", output, 32);
    printf("\nTesting mining (trying 1000 nonces)...\n");
    uint32_t found_nonce;
    uint8_t target_hash[32];
    uint8_t best_hash[32];
    // Set a target (easier than difficulty)
    memset(target_hash, 0xff, 32);
    RinHash_mine(&version, prev_block, merkle_root, &timestamp, &bits, 
                 0, 1000, &found_nonce, target_hash, best_hash);
    printf("Found nonce: 0x%08x\n", found_nonce);
    print_hex("Best hash", best_hash, 32);
    printf("\nTest completed successfully!\n");
    return 0;
 }
--- a/rin/miner/gpu/RinHash-hip/CMakeLists.txt
+++ b/rin/miner/gpu/RinHash-hip/CMakeLists.txt
@@ -0,0 +1,21 @@
 cmake_minimum_required(VERSION 3.21)
 project(RinHashHIP LANGUAGES CXX HIP)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_HIP_STANDARD 17)
 # Enable HIP
 find_package(HIP REQUIRED)
 set(SOURCES
  rinhash.hip.cu
  sha3-256.hip.cu
 )
 add_executable(rinhash-hip-miner ${SOURCES})
 target_include_directories(rinhash-hip-miner PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 target_compile_definitions(rinhash-hip-miner PRIVATE __HIP_PLATFORM_AMD__)
 target_link_libraries(rinhash-hip-miner PRIVATE HIP::device)
--- a/rin/miner/gpu/RinHash-hip/argon2d_device.cuh
+++ b/rin/miner/gpu/RinHash-hip/argon2d_device.cuh
@@ -0,0 +1,929 @@
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdio.h>
 //=== Argon2 定数 ===//
 #define ARGON2_BLOCK_SIZE 1024
 #define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8)
 #define ARGON2_OWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 16)
 #define ARGON2_HWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 32)
 #define ARGON2_SYNC_POINTS 4
 #define ARGON2_PREHASH_DIGEST_LENGTH 64
 #define ARGON2_PREHASH_SEED_LENGTH 72
 #define ARGON2_VERSION_10 0x10
 #define ARGON2_VERSION_13 0x13
 #define ARGON2_ADDRESSES_IN_BLOCK 128
 //=== Blake2b 定数 ===//
 #define BLAKE2B_BLOCKBYTES 128
 #define BLAKE2B_OUTBYTES 64
 #define BLAKE2B_KEYBYTES 64
 #define BLAKE2B_SALTBYTES 16
 #define BLAKE2B_PERSONALBYTES 16
 #define BLAKE2B_ROUNDS 12
 //=== 構造体定義 ===//
 typedef struct __align__(64) block_ {
    uint64_t v[ARGON2_QWORDS_IN_BLOCK];
 } block;
 typedef struct Argon2_instance_t {
    block *memory;          /* Memory pointer */
    uint32_t version;
    uint32_t passes;        /* Number of passes */
    uint32_t memory_blocks; /* Number of blocks in memory */
    uint32_t segment_length;
    uint32_t lane_length;
    uint32_t lanes;
    uint32_t threads;
    int print_internals; /* whether to print the memory blocks */
 } argon2_instance_t;
 /*
 * Argon2 position: where we construct the block right now. Used to distribute
 * work between threads.
 */
 typedef struct Argon2_position_t {
    uint32_t pass;
    uint32_t lane;
    uint8_t slice;
    uint32_t index;
 } argon2_position_t;
 typedef struct __blake2b_state {
    uint64_t h[8];
    uint64_t t[2];
    uint64_t f[2];
    uint8_t buf[BLAKE2B_BLOCKBYTES];
    unsigned buflen;
    unsigned outlen;
    uint8_t last_node;
 } blake2b_state;
 typedef struct __blake2b_param {
    uint8_t digest_length;                   /* 1 */
    uint8_t key_length;                      /* 2 */
    uint8_t fanout;                          /* 3 */
    uint8_t depth;                           /* 4 */
    uint32_t leaf_length;                    /* 8 */
    uint64_t node_offset;                    /* 16 */
    uint8_t node_depth;                      /* 17 */
    uint8_t inner_length;                    /* 18 */
    uint8_t reserved[14];                    /* 32 */
    uint8_t salt[BLAKE2B_SALTBYTES];         /* 48 */
    uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
 } blake2b_param;
 //=== 定数メモリ ===//
 __constant__ uint64_t blake2b_IV[8] = {
    0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
    0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
    0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
    0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
 };
 __constant__ uint8_t blake2b_sigma[12][16] = {
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
    {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
    {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
    {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
    {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
    {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
    {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
    {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
    {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}
 };
 //=== 共通ヘルパー関数 ===//
 __device__ __forceinline__ uint64_t rotr64(uint64_t x, uint32_t n) {
    return (x >> n) | (x << (64 - n));
 }
 // fBlaMka関数をCリファレンス実装と完全に一致させる
 __device__ __forceinline__ uint64_t fBlaMka(uint64_t x, uint64_t y) {
    const uint64_t m = 0xFFFFFFFFULL;
    uint64_t xy = (x & m) * (y & m);
    return x + y + 2 * xy;
 }
 // Blake2b G関数 - リファレンス実装と完全に一致させる
 __device__ __forceinline__ void blake2b_G(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t m1, uint64_t m2) {
    a = a + b + m1;
    d = rotr64(d ^ a, 32);
    c = c + d;
    b = rotr64(b ^ c, 24);
    a = a + b + m2;
    d = rotr64(d ^ a, 16);
    c = c + d;
    b = rotr64(b ^ c, 63);
 }
 // リトルエンディアンでの32ビット値の格納
 __device__ __forceinline__ void store32(void *dst, uint32_t w) {
    #if defined(NATIVE_LITTLE_ENDIAN)
        memcpy(dst, &w, sizeof w);
    #else
        uint8_t *p = (uint8_t *)dst;
        *p++ = (uint8_t)w;
        w >>= 8;
        *p++ = (uint8_t)w;
        w >>= 8;
        *p++ = (uint8_t)w;
        w >>= 8;
        *p++ = (uint8_t)w;
    #endif
    }
 __device__ __forceinline__ void blake2b_increment_counter(blake2b_state *S,
    uint64_t inc) {
 S->t[0] += inc;
 S->t[1] += (S->t[0] < inc);
 }
 __device__ __forceinline__ void blake2b_set_lastnode(blake2b_state *S) {
    S->f[1] = (uint64_t)-1;
 }
 __device__ __forceinline__ void blake2b_set_lastblock(blake2b_state *S) {
    if (S->last_node) {
        blake2b_set_lastnode(S);
    }
    S->f[0] = (uint64_t)-1;
 }
 // Add structure-specific memset function
 __device__ void blake2b_state_memset(blake2b_state* S) {
    for (int i = 0; i < sizeof(blake2b_state); i++) {
        ((uint8_t*)S)[i] = 0;
    }
 }
 // Add missing xor_block function
 __device__ void xor_block(block* dst, const block* src) {
    for (int i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
        dst->v[i] ^= src->v[i];
    }
 }
 // custom memcpy, apparently cuda's memcpy is slow 
 // when called within a kernel
 __device__ void c_memcpy(void *dest, const void *src, size_t n) {
    uint8_t *d = (uint8_t*)dest;
    const uint8_t *s = (const uint8_t*)src;
    for (size_t i = 0; i < n; i++) {
        d[i] = s[i];
    }
 }
 // Add missing copy_block function
 __device__ void copy_block(block* dst, const block* src) {
    c_memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
 }
 // fill_blockをCリファレンス実装と完全に一致させる
 __device__ void fill_block(const block* prev_block, const block* ref_block, block* next_block, int with_xor) {
    block blockR = {};
    block block_tmp = {};
    unsigned i;
    copy_block(&blockR, ref_block);
    xor_block(&blockR, prev_block);
    copy_block(&block_tmp, &blockR);
    if (with_xor) {
        xor_block(&block_tmp, next_block);
    }
    // G function without macro
    auto g = [](uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d) {
        a = fBlaMka(a, b);
        d = rotr64(d ^ a, 32);
        c = fBlaMka(c, d);
        b = rotr64(b ^ c, 24);
        a = fBlaMka(a, b);
        d = rotr64(d ^ a, 16);
        c = fBlaMka(c, d);
        b = rotr64(b ^ c, 63);
    };
    // BLAKE2_ROUND_NOMSG function without macro
    auto blake2_round = [&g](uint64_t& v0, uint64_t& v1, uint64_t& v2, uint64_t& v3,
                            uint64_t& v4, uint64_t& v5, uint64_t& v6, uint64_t& v7,
                            uint64_t& v8, uint64_t& v9, uint64_t& v10, uint64_t& v11,
                            uint64_t& v12, uint64_t& v13, uint64_t& v14, uint64_t& v15) {
        do {                                                                       
            g(v0, v4, v8, v12);                                                    
            g(v1, v5, v9, v13);                                                    
            g(v2, v6, v10, v14);                                                   
            g(v3, v7, v11, v15);                                                   
            g(v0, v5, v10, v15);                                                   
            g(v1, v6, v11, v12);                                                   
            g(v2, v7, v8, v13);                                                    
            g(v3, v4, v9, v14);                                                    
        } while ((void)0, 0);
    };
    // Apply Blake2 on columns
    for (i = 0; i < 8; ++i) {
        blake2_round(
            blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
            blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
            blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
            blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
            blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
            blockR.v[16 * i + 15]
        );
    }
    // Apply Blake2 on rows
    for (i = 0; i < 8; i++) {
        blake2_round(
            blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
            blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
            blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
            blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
            blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
            blockR.v[2 * i + 113]
        );
    }
    copy_block(next_block, &block_tmp);
    xor_block(next_block, &blockR);
 }
 template<typename T, typename ptr_t>
 __device__ void c_memset(ptr_t dest, T val, int count) {
    for(int i=0; i<count; i++)
        dest[i] = val;
 }
 __device__ void init_block_value(block *b, uint8_t in) { c_memset(b->v, in, sizeof(b->v)); }
 __device__  void next_addresses(block *address_block, block *input_block,
    const block *zero_block) {
 input_block->v[6]++;
 fill_block(zero_block, input_block, address_block, 0);
 fill_block(zero_block, address_block, address_block, 0);
 }
 __device__ void G1(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t x, uint64_t y) {
    a = a + b + x;
    d = rotr64(d ^ a, 32);
    c = c + d;
    b = rotr64(b ^ c, 24);
    a = a + b + y;
    d = rotr64(d ^ a, 16);
    c = c + d;
    b = rotr64(b ^ c, 63);
 }
 // Blake2b compression function F
 __device__ void blake2b_compress(blake2b_state* S, const uint8_t block[BLAKE2B_BLOCKBYTES]) {
    uint64_t m[16];
    uint64_t v[16];
    // Load message block into m[16]
    for (int i = 0; i < 16; i++) {
        const uint8_t* p = block + i * 8;
        m[i] = ((uint64_t)p[0])
             | ((uint64_t)p[1] << 8)
             | ((uint64_t)p[2] << 16)
             | ((uint64_t)p[3] << 24)
             | ((uint64_t)p[4] << 32)
             | ((uint64_t)p[5] << 40)
             | ((uint64_t)p[6] << 48)
             | ((uint64_t)p[7] << 56);
    }
    // Initialize v[0..15]
    for (int i = 0; i < 8; i++) {
        v[i] = S->h[i];
        v[i + 8] = blake2b_IV[i];
    }
    v[12] ^= S->t[0];
    v[13] ^= S->t[1];
    v[14] ^= S->f[0];
    v[15] ^= S->f[1];
    for (int r = 0; r < BLAKE2B_ROUNDS; r++) {
        const uint8_t* s = blake2b_sigma[r];
        // Column step
        G1(v[0], v[4], v[8], v[12], m[s[0]], m[s[1]]);
        G1(v[1], v[5], v[9], v[13], m[s[2]], m[s[3]]);
        G1(v[2], v[6], v[10], v[14], m[s[4]], m[s[5]]);
        G1(v[3], v[7], v[11], v[15], m[s[6]], m[s[7]]);
        // Diagonal step
        G1(v[0], v[5], v[10], v[15], m[s[8]], m[s[9]]);
        G1(v[1], v[6], v[11], v[12], m[s[10]], m[s[11]]);
        G1(v[2], v[7], v[8], v[13], m[s[12]], m[s[13]]);
        G1(v[3], v[4], v[9], v[14], m[s[14]], m[s[15]]);
    }
    // Finalization
    for (int i = 0; i < 8; i++) {
        S->h[i] ^= v[i] ^ v[i + 8];
    }
 }
 // Helper functions to load/store 64-bit values in little-endian order
 __device__ __forceinline__ uint64_t load64(const void* src) {
    const uint8_t* p = (const uint8_t*)src;
    return ((uint64_t)(p[0]))
        | ((uint64_t)(p[1]) << 8)
        | ((uint64_t)(p[2]) << 16)
        | ((uint64_t)(p[3]) << 24)
        | ((uint64_t)(p[4]) << 32)
        | ((uint64_t)(p[5]) << 40)
        | ((uint64_t)(p[6]) << 48)
        | ((uint64_t)(p[7]) << 56);
 }
 __device__ __forceinline__ void store64(void* dst, uint64_t w) {
    uint8_t* p = (uint8_t*)dst;
    p[0] = (uint8_t)(w);
    p[1] = (uint8_t)(w >> 8);
    p[2] = (uint8_t)(w >> 16);
    p[3] = (uint8_t)(w >> 24);
    p[4] = (uint8_t)(w >> 32);
    p[5] = (uint8_t)(w >> 40);
    p[6] = (uint8_t)(w >> 48);
    p[7] = (uint8_t)(w >> 56);
 }
 __device__ void load_block(block *dst, const void *input) {
    unsigned i;
    for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
        dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
    }
 }
 __device__ void store_block(void *output, const block *src) {
    unsigned i;
    for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
        store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
    }
 }
 // Blake2b init function to match reference implementation exactly
 __device__ int blake2b_init(blake2b_state* S, size_t outlen) {
    blake2b_param P;
    // Clear state using our custom function
    blake2b_state_memset(S);
    // Set parameters according to Blake2b spec
    P.digest_length = (uint8_t)outlen;
    P.key_length = 0;
    P.fanout = 1;
    P.depth = 1;
    P.leaf_length = 0;
    P.node_offset = 0;
    P.node_depth = 0;
    P.inner_length = 0;
    c_memset(P.reserved, 0, sizeof(P.reserved));
    c_memset(P.salt, 0, sizeof(P.salt));
    c_memset(P.personal, 0, sizeof(P.personal));
    // Initialize state vector with IV
    for (int i = 0; i < 8; i++) {
        S->h[i] = blake2b_IV[i];
    }
    const unsigned char *p = (const unsigned char *)(&P);
    /* IV XOR Parameter Block */
    for (int i = 0; i < 8; ++i) {
        S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
    }
    S->outlen = P.digest_length;
    return 0; // Success
 }
 __device__ int FLAG_clear_internal_memory = 0;
 __device__ void clear_internal_memory(void *v, size_t n) {
  if (FLAG_clear_internal_memory && v) {
 //    secure_wipe_memory(v, n);
  }
 }
 // Blake2b update function to match reference implementation
 __device__ int blake2b_update(blake2b_state* S, const uint8_t* in, size_t inlen) {
    const uint8_t *pin = (const uint8_t *)in;
    if (inlen == 0) {
        return 0;
    }
    /* Sanity check */
    if (S == NULL || in == NULL) {
        return -1;
    }
    /* Is this a reused state? */
    if (S->f[0] != 0) {
        return -1;
    }
    if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
        /* Complete current block */
        size_t left = S->buflen;
        size_t fill = BLAKE2B_BLOCKBYTES - left;
        c_memcpy(&S->buf[left], pin, fill);
        blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
        blake2b_compress(S, S->buf);
        S->buflen = 0;
        inlen -= fill;
        pin += fill;
        /* Avoid buffer copies when possible */
        while (inlen > BLAKE2B_BLOCKBYTES) {
            blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
            blake2b_compress(S, pin);
            inlen -= BLAKE2B_BLOCKBYTES;
            pin += BLAKE2B_BLOCKBYTES;
        }
    }
    c_memcpy(&S->buf[S->buflen], pin, inlen);
    S->buflen += (unsigned int)inlen;
    return 0; // Success
 }
 // Blake2b final function to match reference implementation
 __device__ int blake2b_final(blake2b_state* S, uint8_t* out, size_t outlen) {
    if (!S || !out)
        return -1;
    uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
    unsigned int i;
    blake2b_increment_counter(S, S->buflen);
    blake2b_set_lastblock(S);
    c_memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
    blake2b_compress(S, S->buf);
    for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
        store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
    }
    c_memcpy(out, buffer, S->outlen);
    return 0;
 }
 __device__ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
    size_t keylen) {
 blake2b_param P;
 if (S == NULL) {
 return -1;
 }
 /* Setup Parameter Block for keyed BLAKE2 */
 P.digest_length = (uint8_t)outlen;
 P.key_length = (uint8_t)keylen;
 P.fanout = 1;
 P.depth = 1;
 P.leaf_length = 0;
 P.node_offset = 0;
 P.node_depth = 0;
 P.inner_length = 0;
 c_memset(P.reserved, 0, sizeof(P.reserved));
 c_memset(P.salt, 0, sizeof(P.salt));
 c_memset(P.personal, 0, sizeof(P.personal));
    // Initialize state vector with IV
    for (int i = 0; i < 8; i++) {
        S->h[i] = blake2b_IV[i];
    }
    // XOR first element with param
    const unsigned char *p = (const unsigned char *)(&P);
    /* IV XOR Parameter Block */
    for (int i = 0; i < 8; ++i) {
        S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
    }
    S->outlen = P.digest_length;
 uint8_t block[BLAKE2B_BLOCKBYTES];
 c_memset(block, 0, BLAKE2B_BLOCKBYTES);
 c_memcpy(block, key, keylen);
 blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
 /* Burn the key from stack */
 clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
 return 0;
 }
 // Blake2b all-in-one function
 __device__ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
    const void *key, size_t keylen) {
 blake2b_state S;
 int ret = -1;
 /* Verify parameters */
 if (NULL == in && inlen > 0) {
 goto fail;
 }
 if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
 goto fail;
 }
 if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
 goto fail;
 }
 if (keylen > 0) {
 if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
    goto fail;
 }
 } else {
 if (blake2b_init(&S, outlen) < 0) {
    goto fail;
 }
 }
 if (blake2b_update(&S, (const uint8_t*)in, inlen) < 0) {
 goto fail;
 }
 ret = blake2b_final(&S, (uint8_t*)out, outlen);
 fail:
 clear_internal_memory(&S, sizeof(S));
 return ret;
 }
 // index_alpha関数を完全にCリファレンス実装と一致させる（関数のシグネチャも含め）
 __device__ uint32_t index_alpha(const argon2_instance_t *instance,
    const argon2_position_t *position, uint32_t pseudo_rand,
    int same_lane) {
        uint32_t reference_area_size;
        uint64_t relative_position;
        uint32_t start_position, absolute_position;
        if (0 == position->pass) {
            /* First pass */
            if (0 == position->slice) {
                /* First slice */
                reference_area_size =
                    position->index - 1; /* all but the previous */
            } else {
                if (same_lane) {
                    /* The same lane => add current segment */
                    reference_area_size =
                        position->slice * instance->segment_length +
                        position->index - 1;
                } else {
                    reference_area_size =
                        position->slice * instance->segment_length +
                        ((position->index == 0) ? (-1) : 0);
                }
            }
        } else {
            /* Second pass */
            if (same_lane) {
                reference_area_size = instance->lane_length -
                                      instance->segment_length + position->index -
                                      1;
            } else {
                reference_area_size = instance->lane_length -
                                      instance->segment_length +
                                      ((position->index == 0) ? (-1) : 0);
            }
        }
        /* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
         * relative position */
        relative_position = pseudo_rand;
        relative_position = relative_position * relative_position >> 32;
        relative_position = reference_area_size - 1 -
                            (reference_area_size * relative_position >> 32);
        /* 1.2.5 Computing starting position */
        start_position = 0;
        if (0 != position->pass) {
            start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
                                 ? 0
                                 : (position->slice + 1) * instance->segment_length;
        }
        /* 1.2.6. Computing absolute position */
        absolute_position = (start_position + relative_position) %
                            instance->lane_length; /* absolute position */
        return absolute_position;
 }
 // fill_segment関数を追加（Cリファレンス実装と完全に一致）
 __device__ void fill_segment(const argon2_instance_t *instance,
    argon2_position_t position) {
        block *ref_block = NULL, *curr_block = NULL;
    block address_block, input_block, zero_block;
    uint64_t pseudo_rand, ref_index, ref_lane;
    uint32_t prev_offset, curr_offset;
    uint32_t starting_index;
    uint32_t i;
    int data_independent_addressing;
    data_independent_addressing = false;
    if (data_independent_addressing) {
        init_block_value(&zero_block, 0);
        init_block_value(&input_block, 0);
        input_block.v[0] = position.pass;
        input_block.v[1] = position.lane;
        input_block.v[2] = position.slice;
        input_block.v[3] = instance->memory_blocks;
        input_block.v[4] = instance->passes;
        input_block.v[5] = 0;
    }
    starting_index = 0;
    if ((0 == position.pass) && (0 == position.slice)) {
        starting_index = 2; /* we have already generated the first two blocks */
        /* Don't forget to generate the first block of addresses: */
        if (data_independent_addressing) {
            next_addresses(&address_block, &input_block, &zero_block);
        }
    }
    /* Offset of the current block */
    curr_offset = position.lane * instance->lane_length +
                  position.slice * instance->segment_length + starting_index;
    if (0 == curr_offset % instance->lane_length) {
        /* Last block in this lane */
        prev_offset = curr_offset + instance->lane_length - 1;
    } else {
        /* Previous block */
        prev_offset = curr_offset - 1;
    }
    for (i = starting_index; i < instance->segment_length;
         ++i, ++curr_offset, ++prev_offset) {
        /*1.1 Rotating prev_offset if needed */
        if (curr_offset % instance->lane_length == 1) {
            prev_offset = curr_offset - 1;
        }
        /* 1.2 Computing the index of the reference block */
        /* 1.2.1 Taking pseudo-random value from the previous block */
        if (data_independent_addressing) {
            if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
                next_addresses(&address_block, &input_block, &zero_block);
            }
            pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
        } else {
            pseudo_rand = instance->memory[prev_offset].v[0];
        }
        /* 1.2.2 Computing the lane of the reference block */
        ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
        if ((position.pass == 0) && (position.slice == 0)) {
            /* Can not reference other lanes yet */
            ref_lane = position.lane;
        }
        /* 1.2.3 Computing the number of possible reference block within the
         * lane.
         */
        position.index = i;
        ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
                                ref_lane == position.lane);
        /* 2 Creating a new block */
        ref_block =
            instance->memory + instance->lane_length * ref_lane + ref_index;
        curr_block = instance->memory + curr_offset;
        if (ARGON2_VERSION_10 == instance->version) {
            /* version 1.2.1 and earlier: overwrite, not XOR */
            fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
        } else {
            if(0 == position.pass) {
                fill_block(instance->memory + prev_offset, ref_block,
                           curr_block, 0);
            } else {
                fill_block(instance->memory + prev_offset, ref_block,
                           curr_block, 1);
            }
        }
    }
 }
 // fill_memory関数をCリファレンス実装と完全に一致させる
 __device__ void fill_memory(block* memory, uint32_t passes, uint32_t lanes, uint32_t lane_length, uint32_t segment_length) {
    argon2_instance_t instance;
    instance.version = ARGON2_VERSION_13;
    instance.passes = passes;
    instance.memory = memory;
    instance.memory_blocks = lanes * lane_length;
    instance.segment_length = segment_length;
    instance.lane_length = lane_length;
    instance.lanes = lanes;
    instance.threads = lanes;
    instance.print_internals = 0;
    argon2_position_t position;
    for (uint32_t pass = 0; pass < passes; ++pass) {
        position.pass = pass;
        for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) {
            position.slice = slice;
            for (uint32_t lane = 0; lane < lanes; ++lane) {
                position.lane = lane;
                fill_segment(&instance, position);
            }
        }
    }
 }
 // blake2b_long関数をCリファレンス実装と完全に一致させる
 __device__ int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
    uint8_t *out = (uint8_t *)pout;
    blake2b_state blake_state;
    uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
    int ret = -1;
    if (outlen > UINT32_MAX) {
        goto fail;
    }
    /* Ensure little-endian byte order! */
    store32(outlen_bytes, (uint32_t)outlen);
 #define TRY(statement)                                                         \
    do {                                                                       \
        ret = statement;                                                       \
        if (ret < 0) {                                                         \
            goto fail;                                                         \
        }                                                                      \
    } while ((void)0, 0)
    if (outlen <= BLAKE2B_OUTBYTES) {
        TRY(blake2b_init(&blake_state, outlen));
        TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
        TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
        TRY(blake2b_final(&blake_state, out, outlen));
    } else {
        uint32_t toproduce;
        uint8_t out_buffer[BLAKE2B_OUTBYTES];
        uint8_t in_buffer[BLAKE2B_OUTBYTES];
        TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
        TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
        TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
        TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
        c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
        out += BLAKE2B_OUTBYTES / 2;
        toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
        while (toproduce > BLAKE2B_OUTBYTES) {
            c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
            TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES, NULL, 0));
            c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
            out += BLAKE2B_OUTBYTES / 2;
            toproduce -= BLAKE2B_OUTBYTES / 2;
        }
        c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
        TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
                    0));
        c_memcpy(out, out_buffer, toproduce);
    }
 fail:
    clear_internal_memory(&blake_state, sizeof(blake_state));
    return ret;
 #undef TRY
 }
 // device_argon2d_hash関数を完全にCリファレンス実装と一致させる
 __device__ void device_argon2d_hash(
    uint8_t* output,
    const uint8_t* input, size_t input_len,
    uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
    block* memory,
    const uint8_t* salt, size_t salt_len
 ) {
    argon2_instance_t instance;
    // 1. メモリサイズの調整
    uint32_t memory_blocks = m_cost;
    if (memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
        memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
    }
    uint32_t segment_length = memory_blocks / (lanes * ARGON2_SYNC_POINTS);
    memory_blocks = segment_length * (lanes * ARGON2_SYNC_POINTS);
    uint32_t lane_length = segment_length * ARGON2_SYNC_POINTS;
    // Initialize instance with the provided memory pointer
    instance.version = ARGON2_VERSION_13;
    instance.memory = memory;  // Use the provided memory pointer
    instance.passes = t_cost;
    instance.memory_blocks = memory_blocks;
    instance.segment_length = segment_length;
    instance.lane_length = lane_length;
    instance.lanes = lanes;
    instance.threads = 1;
    // 2. 初期ハッシュの計算
    uint8_t blockhash[ARGON2_PREHASH_DIGEST_LENGTH];
    blake2b_state BlakeHash;
    blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
    uint8_t value[sizeof(uint32_t)];
    store32(&value, lanes);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, 32);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, memory_blocks);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, t_cost);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, ARGON2_VERSION_13);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, 0);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, input_len);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    blake2b_update(&BlakeHash, (const uint8_t *)input, input_len);
    store32(&value, salt_len);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    blake2b_update(&BlakeHash, (const uint8_t *)salt, salt_len);
    store32(&value, 0);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    store32(&value, 0);
    blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
    blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
    // 3. Initialize first blocks in each lane
    uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
    uint8_t initial_hash[ARGON2_PREHASH_SEED_LENGTH];
    c_memcpy(initial_hash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
    c_memset(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0, ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
    for (uint32_t l = 0; l < lanes; ++l) {
        store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
        store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
        blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
        load_block(&memory[l * lane_length], blockhash_bytes);
        store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
        blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
        load_block(&memory[l * lane_length + 1], blockhash_bytes);
    }
    // 4. Fill memory
    fill_memory(memory, t_cost, lanes, lane_length, segment_length);
    // 5. Final block mixing
    block final_block;
    copy_block(&final_block, &memory[0 * lane_length + (lane_length - 1)]);
    for (uint32_t l = 1; l < lanes; ++l) {
        uint32_t last_block_in_lane = l * lane_length + (lane_length - 1);
        xor_block(&final_block, &memory[last_block_in_lane]);
    }
    // 6. Final hash
    uint8_t final_block_bytes[ARGON2_BLOCK_SIZE];
    store_block(final_block_bytes, &final_block);
    blake2b_long(output, 32, final_block_bytes, ARGON2_BLOCK_SIZE);
 }
 //=== __global__ カーネル例（salt 指定版）===//
 // ホスト側でブロック用メモリをあらかじめ確保し、そのポインタ（memory_ptr）を渡すことを前提としています。
 __global__ void argon2d_hash_device_kernel(
    uint8_t* output,
    const uint8_t* input, size_t input_len,
    uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
    block* memory_ptr,   // ホスト側で確保したメモリ領域へのポインタ
    const uint8_t* salt, size_t salt_len
 ) {
    if (threadIdx.x == 0 && blockIdx.x == 0) {
        device_argon2d_hash(output, input, input_len, t_cost, m_cost, lanes, memory_ptr, salt, salt_len);
    }
 }
--- a/rin/miner/gpu/RinHash-hip/blake3_device.cuh
+++ b/rin/miner/gpu/RinHash-hip/blake3_device.cuh
@@ -0,0 +1,272 @@
 #include "blaze3_cpu.cuh"
 // Number of threads per thread block
 __constant__ const int NUM_THREADS = 16;
 // redefine functions, but for the GPU
 // all of them are the same but with g_ prefixed
 __constant__ const u32 g_IV[8] = {
    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 
    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
 };
 __constant__ const int g_MSG_PERMUTATION[] = {
    2, 6, 3, 10, 7, 0, 4, 13, 
    1, 11, 12, 5, 9, 14, 15, 8
 };
 __device__ u32 g_rotr(u32 value, int shift) {
    return (value >> shift)|(value << (usize - shift));
 }
 __device__ void g_g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
    state[a] = state[a] + state[b] + mx;
    state[d] = g_rotr((state[d] ^ state[a]), 16);
    state[c] = state[c] + state[d];
    state[b] = g_rotr((state[b] ^ state[c]), 12);
    state[a] = state[a] + state[b] + my;
    state[d] = g_rotr((state[d] ^ state[a]), 8);
    state[c] = state[c] + state[d];
    state[b] = g_rotr((state[b] ^ state[c]), 7);
 }
 __device__ void g_round(u32 state[16], u32 m[16]) {
    // Mix the columns.
    g_g(state, 0, 4, 8, 12, m[0], m[1]);
    g_g(state, 1, 5, 9, 13, m[2], m[3]);
    g_g(state, 2, 6, 10, 14, m[4], m[5]);
    g_g(state, 3, 7, 11, 15, m[6], m[7]);
    // Mix the diagonals.
    g_g(state, 0, 5, 10, 15, m[8], m[9]);
    g_g(state, 1, 6, 11, 12, m[10], m[11]);
    g_g(state, 2, 7, 8, 13, m[12], m[13]);
    g_g(state, 3, 4, 9, 14, m[14], m[15]);
 }
 __device__ void g_permute(u32 m[16]) {
    u32 permuted[16];
    for(int i=0; i<16; i++)
        permuted[i] = m[g_MSG_PERMUTATION[i]];
    for(int i=0; i<16; i++)
        m[i] = permuted[i];
 }
 // custom memcpy, apparently cuda's memcpy is slow 
 // when called within a kernel
 __device__ void g_memcpy(u32 *lhs, const u32 *rhs, int size) {
    // assuming u32 is 4 bytes
    int len = size / 4;
    for(int i=0; i<len; i++)
        lhs[i] = rhs[i];
 }
 // custom memset
 template<typename T, typename ptr_t>
 __device__ void g_memset(ptr_t dest, T val, int count) {
    for(int i=0; i<count; i++)
        dest[i] = val;
 }
 __device__ void g_compress(
    u32 *chaining_value,
    u32 *block_words,
    u64 counter,
    u32 block_len,
    u32 flags,
    u32 *state
 ) {
    // Search for better alternative
    g_memcpy(state, chaining_value, 32);
    g_memcpy(state+8, g_IV, 16);
    state[12] = (u32)counter;
    state[13] = (u32)(counter >> 32);
    state[14] = block_len;
    state[15] = flags;
    u32 block[16];
    g_memcpy(block, block_words, 64);
    g_round(state, block); // round 1
    g_permute(block);
    g_round(state, block); // round 2
    g_permute(block);
    g_round(state, block); // round 3
    g_permute(block);
    g_round(state, block); // round 4
    g_permute(block);
    g_round(state, block); // round 5
    g_permute(block);
    g_round(state, block); // round 6
    g_permute(block);
    g_round(state, block); // round 7
    for(int i=0; i<8; i++){
        state[i] ^= state[i + 8];
        state[i + 8] ^= chaining_value[i];
    }
 }
 __device__ void g_words_from_little_endian_bytes(
    u8 *bytes, u32 *words, u32 bytes_len
 ) {
    u32 tmp;
    for(u32 i=0; i<bytes_len; i+=4) {
        tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
        words[i/4] = tmp;
    }
 }
 __device__ void Chunk::g_compress_chunk(u32 out_flags) {
    if(flags&PARENT) {
        g_compress(
            key,
            data,
            0,  // counter is always zero for parent nodes
            BLOCK_LEN,
            flags | out_flags,
            raw_hash
        );
        return;
    }
    u32 chaining_value[8];
    u32 block_len = BLOCK_LEN, flagger;
    g_memcpy(chaining_value, key, 32);
    bool empty_input = (leaf_len==0);
    if(empty_input) {
        for(u32 i=0; i<BLOCK_LEN; i++)
            leaf_data[i] = 0U;
        leaf_len = BLOCK_LEN;
    }
    // move all mem allocs outside loop
    u32 block_words[16];
    u8 block_cast[BLOCK_LEN];
    for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
        flagger = flags;
        // for the last message block
        if(i+BLOCK_LEN > leaf_len)
            block_len = leaf_len%BLOCK_LEN;
        else
            block_len = BLOCK_LEN;
        // special case
        if(empty_input)
            block_len = 0;
        // clear up block_words
        g_memset(block_words, 0, 16);
        u32 new_block_len(block_len);
        if(block_len%4)
            new_block_len += 4 - (block_len%4);
        // This memcpy is fine since data is a byte array
        memcpy(block_cast, leaf_data+i, new_block_len*sizeof(*block_cast));
        g_words_from_little_endian_bytes(leaf_data+i, block_words, new_block_len);
        if(i==0)
            flagger |= CHUNK_START;
        if(i+BLOCK_LEN >= leaf_len)
            flagger |= CHUNK_END | out_flags;
        // raw hash for root node
        g_compress(
            chaining_value,
            block_words,
            counter,
            block_len,
            flagger,
            raw_hash
        );
        g_memcpy(chaining_value, raw_hash, 32);
    }
 }
 __global__ void compute(Chunk *data, int l, int r) {
    // n is always a power of 2
    int n = r-l;
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    if(tid >= n)
        return;
    if(n==1) {
        data[l].g_compress_chunk();
        // printf("Compressing : %d\n", l);
    }
    else {
        compute<<<n/2,16>>>(data, l, l+n/2);
        cudaDeviceSynchronize();
        compute<<<n/2,16>>>(data, l+n/2, r);
        cudaDeviceSynchronize();
        data[l].flags |= PARENT;
        memcpy(data[l].data, data[l].raw_hash, 32);
        memcpy(data[l].data+8, data[l+n/2].raw_hash, 32);
        data[l].g_compress_chunk();
        // printf("Compressing : %d to %d\n", l, r);
    }
 }
 // CPU version of light_hash (unchanged)
 void light_hash(Chunk *data, int N, Chunk *result, Chunk *memory_bar) {
    const int data_size = N*sizeof(Chunk);
    // Device settings
    // Allows DeviceSync to be called upto 16 levels of recursion
    cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 16);
    // Device vector
    Chunk *g_data = memory_bar;
    cudaMemcpy(g_data, data, data_size, cudaMemcpyHostToDevice);
    // Actual computation of hash
    compute<<<N,32>>>(g_data, 0, N);
    cudaMemcpy(result, g_data, sizeof(Chunk), cudaMemcpyDeviceToHost);
 }
 // Device-callable version of light_hash
 __device__ void light_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
    // Create a single chunk for processing the input
    Chunk chunk;
    // Initialize the chunk with the input data
    for (int i = 0; i < 8; i++) {
        chunk.key[i] = g_IV[i]; // Use device constant IV
    }
    // Copy the input data to leaf_data (with bounds checking)
    size_t copy_len = min(input_len, (size_t)BLOCK_LEN * 16); // Ensure we don't overflow
    for (size_t i = 0; i < copy_len; i++) {
        chunk.leaf_data[i] = input[i];
    }
    chunk.leaf_len = copy_len;
    chunk.counter = 0;
    chunk.flags = 0; // Default flags
    // Process the chunk directly
    chunk.g_compress_chunk(ROOT); // Set ROOT flag for final output
    // Copy the raw hash to the output
    for (int i = 0; i < 8; i++) {
        // Convert 32-bit words to bytes in little-endian format
        output[i*4]   = (uint8_t)(chunk.raw_hash[i]);
        output[i*4+1] = (uint8_t)(chunk.raw_hash[i] >> 8);
        output[i*4+2] = (uint8_t)(chunk.raw_hash[i] >> 16);
        output[i*4+3] = (uint8_t)(chunk.raw_hash[i] >> 24);
    }
 }
 // Alias for compatibility with other device code
 __device__ void blake3_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
    light_hash_device(input, input_len, output);
 }
--- a/rin/miner/gpu/RinHash-hip/blaze3_cpu.cuh
+++ b/rin/miner/gpu/RinHash-hip/blaze3_cpu.cuh
@@ -0,0 +1,420 @@
 #include <iostream>
 #include <algorithm>
 #include <cstring>
 #include <vector>
 using namespace std;
 // Let's use a pinned memory vector!
 #include <thrust/host_vector.h>
 #include <thrust/system/cuda/experimental/pinned_allocator.h>
 using u32 = uint32_t;
 using u64 = uint64_t;
 using u8  = uint8_t;
 const u32 OUT_LEN = 32;
 const u32 KEY_LEN = 32;
 const u32 BLOCK_LEN = 64;
 const u32 CHUNK_LEN = 1024;
 // Multiple chunks make a snicker bar :)
 const u32 SNICKER = 1U << 10;
 // Factory height and snicker size have an inversly propotional relationship
 // FACTORY_HT * (log2 SNICKER) + 10 >= 64 
 const u32 FACTORY_HT = 5;
 const u32 CHUNK_START = 1 << 0;
 const u32 CHUNK_END = 1 << 1;
 const u32 PARENT = 1 << 2;
 const u32 ROOT = 1 << 3;
 const u32 KEYED_HASH = 1 << 4;
 const u32 DERIVE_KEY_CONTEXT = 1 << 5;
 const u32 DERIVE_KEY_MATERIAL = 1 << 6;
 const int usize = sizeof(u32) * 8;
 u32 IV[8] = {
    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 
    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
 };
 const int MSG_PERMUTATION[] = {
    2, 6, 3, 10, 7, 0, 4, 13, 
    1, 11, 12, 5, 9, 14, 15, 8
 };
 u32 rotr(u32 value, int shift) {
    return (value >> shift)|(value << (usize - shift));
 }
 void g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
    state[a] = state[a] + state[b] + mx;
    state[d] = rotr((state[d] ^ state[a]), 16);
    state[c] = state[c] + state[d];
    state[b] = rotr((state[b] ^ state[c]), 12);
    state[a] = state[a] + state[b] + my;
    state[d] = rotr((state[d] ^ state[a]), 8);
    state[c] = state[c] + state[d];
    state[b] = rotr((state[b] ^ state[c]), 7);
 }
 void round(u32 state[16], u32 m[16]) {
    // Mix the columns.
    g(state, 0, 4, 8, 12, m[0], m[1]);
    g(state, 1, 5, 9, 13, m[2], m[3]);
    g(state, 2, 6, 10, 14, m[4], m[5]);
    g(state, 3, 7, 11, 15, m[6], m[7]);
    // Mix the diagonals.
    g(state, 0, 5, 10, 15, m[8], m[9]);
    g(state, 1, 6, 11, 12, m[10], m[11]);
    g(state, 2, 7, 8, 13, m[12], m[13]);
    g(state, 3, 4, 9, 14, m[14], m[15]);
 }
 void permute(u32 m[16]) {
    u32 permuted[16];
    for(int i=0; i<16; i++)
        permuted[i] = m[MSG_PERMUTATION[i]];
    for(int i=0; i<16; i++)
        m[i] = permuted[i];
 }
 void compress(
    u32 *chaining_value,
    u32 *block_words,
    u64 counter,
    u32 block_len,
    u32 flags,
    u32 *state
 ) {
    memcpy(state, chaining_value, 8*sizeof(*state));
    memcpy(state+8, IV, 4*sizeof(*state));
    state[12] = (u32)counter;
    state[13] = (u32)(counter >> 32);
    state[14] = block_len;
    state[15] = flags;
    u32 block[16];
    memcpy(block, block_words, 16*sizeof(*block));
    round(state, block); // round 1
    permute(block);
    round(state, block); // round 2
    permute(block);
    round(state, block); // round 3
    permute(block);
    round(state, block); // round 4
    permute(block);
    round(state, block); // round 5
    permute(block);
    round(state, block); // round 6
    permute(block);
    round(state, block); // round 7
    for(int i=0; i<8; i++){
        state[i] ^= state[i + 8];
        state[i + 8] ^= chaining_value[i];
    }
 }
 void words_from_little_endian_bytes(u8 *bytes, u32 *words, u32 bytes_len) {
    u32 tmp;
    for(u32 i=0; i<bytes_len; i+=4) {
        tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
        words[i/4] = tmp;
    }
 }
 struct Chunk {
    // use only when it is a leaf node
    // leaf data may have less than 1024 bytes
    u8 leaf_data[1024];
    u32 leaf_len;
    // use in all other cases
    // data will always have 64 bytes
    u32 data[16];
    u32 flags;
    u32 raw_hash[16];
    u32 key[8];
    // only useful for leaf nodes
    u64 counter;
    // Constructor for leaf nodes
    __device__ __host__ Chunk(char *input, int size, u32 _flags, u32 *_key, u64 ctr){
        counter = ctr;
        flags = _flags;
        memcpy(key, _key, 8*sizeof(*key));
        memset(leaf_data, 0, 1024);
        memcpy(leaf_data, input, size);
        leaf_len = size;
    }
    __device__ __host__ Chunk(u32 _flags, u32 *_key) {
        counter = 0;
        flags = _flags;
        memcpy(key, _key, 8*sizeof(*key));
        leaf_len = 0;
    }
    __device__ __host__ Chunk() {}
    // Chunk() : leaf_len(0) {}
    // process data in sizes of message blocks and store cv in hash
    void compress_chunk(u32=0);
    __device__ void g_compress_chunk(u32=0);
 };
 void Chunk::compress_chunk(u32 out_flags) {
    if(flags&PARENT) {
        compress(
            key,
            data,
            0,  // counter is always zero for parent nodes
            BLOCK_LEN,
            flags | out_flags,
            raw_hash
        );
        return;
    }
    u32 chaining_value[8], block_len = BLOCK_LEN, flagger;
    memcpy(chaining_value, key, 8*sizeof(*chaining_value));
    bool empty_input = (leaf_len==0);
    if(empty_input) {
        for(u32 i=0; i<BLOCK_LEN; i++)
            leaf_data[i] = 0U;
        leaf_len = BLOCK_LEN;
    }
    for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
        flagger = flags;
        // for the last message block
        if(i+BLOCK_LEN > leaf_len)
            block_len = leaf_len%BLOCK_LEN;
        else
            block_len = BLOCK_LEN;
        // special case
        if(empty_input)
            block_len = 0;
        u32 block_words[16];
        memset(block_words, 0, 16*sizeof(*block_words));
        u32 new_block_len(block_len);
        if(block_len%4)
            new_block_len += 4 - (block_len%4);
        // BLOCK_LEN is the max possible length of block_cast
        u8 block_cast[BLOCK_LEN];
        memset(block_cast, 0, new_block_len*sizeof(*block_cast));
        memcpy(block_cast, leaf_data+i, block_len*sizeof(*block_cast));
        words_from_little_endian_bytes(block_cast, block_words, new_block_len);
        if(i==0)
            flagger |= CHUNK_START;
        if(i+BLOCK_LEN >= leaf_len)
            flagger |= CHUNK_END | out_flags;
        // raw hash for root node
        compress(
            chaining_value,
            block_words,
            counter,
            block_len,
            flagger,
            raw_hash
        );
        memcpy(chaining_value, raw_hash, 8*sizeof(*chaining_value));
    }
 }
 using thrust_vector = thrust::host_vector<
    Chunk,
    thrust::system::cuda::experimental::pinned_allocator<Chunk>
 >;
 // The GPU hasher
 void light_hash(Chunk*, int, Chunk*, Chunk*);
 // Sanity checks
 Chunk hash_many(Chunk *data, int first, int last, Chunk *memory_bar) {
    // n will always be a power of 2
    int n = last-first;
    // Reduce GPU calling overhead
    if(n == 1) {
        data[first].compress_chunk();
        return data[first];
    }
    Chunk ret;
    light_hash(data+first, n, &ret, memory_bar);
    return ret;
    // CPU style execution
    // Chunk left, right;
    // left = hash_many(data, first, first+n/2);
    // right = hash_many(data, first+n/2, last);
    // Chunk parent(left.flags, left.key);
    // parent.flags |= PARENT;
    // memcpy(parent.data, left.raw_hash, 32);
    // memcpy(parent.data+8, right.raw_hash, 32);
    // parent.compress_chunk();
    // return parent;
 }
 Chunk merge(Chunk &left, Chunk &right);
 void hash_root(Chunk &node, vector<u8> &out_slice);
 struct Hasher {
    u32 key[8];
    u32 flags;
    u64 ctr;
    u64 file_size;
    // A memory bar for CUDA to use during it's computation
    Chunk* memory_bar;
    // Factory is an array of FACTORY_HT possible SNICKER bars
    thrust_vector factory[FACTORY_HT];
    // methods
    static Hasher new_internal(u32 key[8], u32 flags, u64 fsize);
    static Hasher _new(u64);
    // initializes cuda memory (if needed)
    void init();
    // frees cuda memory (if it is there)
    // free nullptr is a no-op
    ~Hasher() { 
        if(memory_bar)
            cudaFree(memory_bar); 
        else
            free(memory_bar);
    }
    void update(char *input, int size);
    void finalize(vector<u8> &out_slice);
    void propagate();
 };
 Hasher Hasher::new_internal(u32 key[8], u32 flags, u64 fsize) {
    return Hasher{
        {
            key[0], key[1], key[2], key[3],
            key[4], key[5], key[6], key[7]
        },
        flags,
        0,   // counter
        fsize
    };
 }
 Hasher Hasher::_new(u64 fsize) { return new_internal(IV, 0, fsize); }
 void Hasher::init() {
    if(file_size<1) {
        memory_bar = nullptr;
        return;
    }
    u64 num_chunks = ceil(file_size / CHUNK_LEN);
    u32 bar_size = min(num_chunks, (u64)SNICKER);
    // Just for safety :)
    ++bar_size;
    cudaMalloc(&memory_bar, bar_size*sizeof(Chunk));
    // Let the most commonly used places always have memory
    // +1 so that it does not resize when it hits CHUNK_LEN
    u32 RESERVE = SNICKER + 1;
    factory[0].reserve(RESERVE);
    factory[1].reserve(RESERVE);
 }
 void Hasher::propagate() {
    int level=0;
    // nodes move to upper levels if lower one is one SNICKER long
    while(factory[level].size() == SNICKER) {
        Chunk subtree = hash_many(factory[level].data(), 0, SNICKER, memory_bar);
        factory[level].clear();
        ++level;
        factory[level].push_back(subtree);
    }
 } 
 void Hasher::update(char *input, int size) {
    factory[0].push_back(Chunk(input, size, flags, key, ctr));
    ++ctr;
    if(factory[0].size() == SNICKER)
        propagate();
 }
 void Hasher::finalize(vector<u8> &out_slice) {
    Chunk root(flags, key);
    for(int i=0; i<FACTORY_HT; i++) {
        vector<Chunk> subtrees;
        u32 n = factory[i].size(), divider=SNICKER;
        if(!n)
            continue;
        int start = 0;
        while(divider) {
            if(n&divider) {
                Chunk subtree = hash_many(factory[i].data(), start, start+divider, memory_bar);
                subtrees.push_back(subtree);
                start += divider;
            }
            divider >>= 1;
        }
        while(subtrees.size()>1) {
            Chunk tmp1 = subtrees.back();
            subtrees.pop_back();
            Chunk tmp2 = subtrees.back();
            subtrees.pop_back();
            // tmp2 is the left child
            // tmp1 is the right child
            // that's the order they appear within the array
            Chunk tmp = merge(tmp2, tmp1);
            subtrees.push_back(tmp);
        }
        if(i<FACTORY_HT-1)
            factory[i+1].push_back(subtrees[0]);
        else
            root = subtrees[0];
    }
    hash_root(root, out_slice);
 }
 Chunk merge(Chunk &left, Chunk &right) {
    // cout << "Called merge once\n";
    left.compress_chunk();
    right.compress_chunk();
    Chunk parent(left.flags, left.key);
    parent.flags |= PARENT;
    // 32 bytes need to be copied for all of these
    memcpy(parent.data, left.raw_hash, 32);
    memcpy(parent.data+8, right.raw_hash, 32);
    return parent;
 }
 void hash_root(Chunk &node, vector<u8> &out_slice) {
    // the last message block must not be hashed like the others
    // it needs to be hashed with the root flag
    u64 output_block_counter = 0;
    u64 i=0, k=2*OUT_LEN;
    u32 words[16] = {};
    for(; int(out_slice.size()-i)>0; i+=k) {
        node.counter = output_block_counter;
        node.compress_chunk(ROOT);
        // words is u32[16]
        memcpy(words, node.raw_hash, 16*sizeof(*words));
        vector<u8> out_block(min(k, (u64)out_slice.size()-i));
        for(u32 l=0; l<out_block.size(); l+=4) {
            for(u32 j=0; j<min(4U, (u32)out_block.size()-l); j++)
                out_block[l+j] = (words[l/4]>>(8*j)) & 0x000000FF;
        }
        for(u32 j=0; j<out_block.size(); j++)
            out_slice[i+j] = out_block[j];
        ++output_block_counter;
    }
 }
--- a/rin/miner/gpu/RinHash-hip/build-hip.bat
+++ b/rin/miner/gpu/RinHash-hip/build-hip.bat
@@ -0,0 +1,18 @@
@echo off
 setlocal
 where hipcc >nul 2>nul
 if errorlevel 1 (
  echo ERROR: hipcc not found. Please install ROCm/HIP toolchain.
  exit /b 1
 )
 if not exist build mkdir build
 cd build
 cmake -G "Ninja" -DHIP_PLATFORM=amd -DCMAKE_BUILD_TYPE=Release ..
 if errorlevel 1 exit /b 1
 cmake --build . -j
 if errorlevel 1 exit /b 1
 cd ..
 echo Build done. Executable should be at build\rinhash-hip-miner.exe
--- a/rin/miner/gpu/RinHash-hip/hip_runtime_shim.h
+++ b/rin/miner/gpu/RinHash-hip/hip_runtime_shim.h
@@ -0,0 +1,29 @@
 #pragma once
 #ifdef __HIP_PLATFORM_AMD__
  #include <hip/hip_runtime.h>
  #include <hip/hip_runtime_api.h>
  #define cudaError_t            hipError_t
  #define cudaSuccess            hipSuccess
  #define cudaMalloc             hipMalloc
  #define cudaFree               hipFree
  #define cudaMemcpy             hipMemcpy
  #define cudaMemcpyHostToDevice hipMemcpyHostToDevice
  #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
  #define cudaDeviceSynchronize  hipDeviceSynchronize
  #define cudaGetErrorString     hipGetErrorString
  #define cudaGetLastError       hipGetLastError
  #define cudaMemGetInfo         hipMemGetInfo
  #define cudaDeviceReset        hipDeviceReset
  #define __global__             __global__
  #define __device__             __device__
  #define __host__               __host__
  #define __shared__             __shared__
  #define __syncthreads          __syncthreads
  #define blockIdx               hipBlockIdx_x
  #define threadIdx              hipThreadIdx_x
  #define blockDim               hipBlockDim_x
  #define gridDim                hipGridDim_x
  #define hipLaunchKernelGGL(F,GRID,BLOCK,SHMEM,STREAM,...) \
          hipLaunchKernelGGL(F, dim3(GRID), dim3(BLOCK), SHMEM, STREAM, __VA_ARGS__)
 #endif
--- a/rin/miner/gpu/RinHash-hip/rinhash.hip.cu
+++ b/rin/miner/gpu/RinHash-hip/rinhash.hip.cu
@@ -0,0 +1,283 @@
 #include "hip_runtime_shim.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <vector>
 #include <stdexcept>
 // Include shared device functions
 #include "rinhash_device.cuh"
 #include "argon2d_device.cuh"
 #include "sha3-256.hip.cu"
 #include "blake3_device.cuh"
 // Modified kernel to use device functions and write output
 extern "C" __global__ void rinhash_cuda_kernel(
    const uint8_t* input,
    size_t input_len,
    uint8_t* output,
    block* argon2_memory
 ) {
    __shared__ uint8_t blake3_out[32];
    __shared__ uint8_t argon2_out[32];
    if (threadIdx.x == 0) {
        // Step 1: BLAKE3 hash
        light_hash_device(input, input_len, blake3_out);
        // Step 2: Argon2d hash (t_cost=2, m_cost=64, lanes=1)
        uint8_t salt[11] = { 'R','i','n','C','o','i','n','S','a','l','t' };
        device_argon2d_hash(argon2_out, blake3_out, 32, 2, 64, 1, argon2_memory, salt, 11);
        // Step 3: SHA3-256 hash
        uint8_t sha3_out[32];
        sha3_256_device(argon2_out, 32, sha3_out);
        // Write result to output
        for (int i = 0; i < 32; i++) {
            output[i] = sha3_out[i];
        }
    }
    __syncthreads();
 }
 // RinHash HIP implementation for a single header
 extern "C" void rinhash_cuda(const uint8_t* input, size_t input_len, uint8_t* output) {
    // Argon2 parameters
    const uint32_t m_cost = 64; // blocks (64 KiB)
    uint8_t *d_input = nullptr;
    uint8_t *d_output = nullptr;
    block *d_memory = nullptr;
    cudaError_t err;
    // Allocate device buffers
    err = cudaMalloc(&d_input, input_len);
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to allocate input memory: %s\n", cudaGetErrorString(err));
        return;
    }
    err = cudaMalloc(&d_output, 32);
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to allocate output memory: %s\n", cudaGetErrorString(err));
        cudaFree(d_input);
        return;
    }
    // Allocate Argon2 memory once per hash
    err = cudaMalloc(&d_memory, m_cost * sizeof(block));
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    // Copy input header
    err = cudaMemcpy(d_input, input, input_len, cudaMemcpyHostToDevice);
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to copy input to device: %s\n", cudaGetErrorString(err));
        cudaFree(d_memory);
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    // Launch the kernel (single thread is fine for single hash)
    rinhash_cuda_kernel<<<1, 1>>>(d_input, input_len, d_output, d_memory);
    // Wait
    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error during kernel execution: %s\n", cudaGetErrorString(err));
        cudaFree(d_memory);
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    // Copy result
    err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to copy output from device: %s\n", cudaGetErrorString(err));
    }
    // Free
    cudaFree(d_memory);
    cudaFree(d_input);
    cudaFree(d_output);
 }
 // Helper function to convert a block header to bytes
 extern "C" void blockheader_to_bytes(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    const uint32_t* nonce,
    uint8_t* output,
    size_t* output_len
 ) {
    size_t offset = 0;
    memcpy(output + offset, version, 4); offset += 4;
    memcpy(output + offset, prev_block, 32); offset += 32;
    memcpy(output + offset, merkle_root, 32); offset += 32;
    memcpy(output + offset, timestamp, 4); offset += 4;
    memcpy(output + offset, bits, 4); offset += 4;
    memcpy(output + offset, nonce, 4); offset += 4;
    *output_len = offset;
 }
 // Batch processing version for mining (sequential per header for correctness)
 extern "C" void rinhash_cuda_batch(
    const uint8_t* block_headers,
    size_t block_header_len,
    uint8_t* outputs,
    uint32_t num_blocks
 ) {
    // Argon2 parameters
    const uint32_t m_cost = 64;
    // Allocate reusable device buffers
    uint8_t *d_input = nullptr;
    uint8_t *d_output = nullptr;
    block *d_memory = nullptr;
    cudaError_t err;
    err = cudaMalloc(&d_input, block_header_len);
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to allocate header buffer: %s\n", cudaGetErrorString(err));
        return;
    }
    err = cudaMalloc(&d_output, 32);
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to allocate output buffer: %s\n", cudaGetErrorString(err));
        cudaFree(d_input);
        return;
    }
    err = cudaMalloc(&d_memory, m_cost * sizeof(block));
    if (err != cudaSuccess) {
        fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
        cudaFree(d_input);
        cudaFree(d_output);
        return;
    }
    for (uint32_t i = 0; i < num_blocks; i++) {
        const uint8_t* header = block_headers + i * block_header_len;
        uint8_t* out = outputs + i * 32;
        err = cudaMemcpy(d_input, header, block_header_len, cudaMemcpyHostToDevice);
        if (err != cudaSuccess) {
            fprintf(stderr, "HIP error: copy header %u failed: %s\n", i, cudaGetErrorString(err));
            break;
        }
        rinhash_cuda_kernel<<<1, 1>>>(d_input, block_header_len, d_output, d_memory);
        err = cudaDeviceSynchronize();
        if (err != cudaSuccess) {
            fprintf(stderr, "HIP error in kernel %u: %s\n", i, cudaGetErrorString(err));
            break;
        }
        err = cudaMemcpy(out, d_output, 32, cudaMemcpyDeviceToHost);
        if (err != cudaSuccess) {
            fprintf(stderr, "HIP error: copy out %u failed: %s\n", i, cudaGetErrorString(err));
            break;
        }
    }
    cudaFree(d_memory);
    cudaFree(d_output);
    cudaFree(d_input);
 }
 // Main RinHash function that would be called from outside
 extern "C" void RinHash(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    const uint32_t* nonce,
    uint8_t* output
 ) {
    uint8_t block_header[80];
    size_t block_header_len;
    blockheader_to_bytes(
        version,
        prev_block,
        merkle_root,
        timestamp,
        bits,
        nonce,
        block_header,
        &block_header_len
    );
    rinhash_cuda(block_header, block_header_len, output);
 }
 // Mining function that tries different nonces (host-side best selection)
 extern "C" void RinHash_mine(
    const uint32_t* version,
    const uint32_t* prev_block,
    const uint32_t* merkle_root,
    const uint32_t* timestamp,
    const uint32_t* bits,
    uint32_t start_nonce,
    uint32_t num_nonces,
    uint32_t* found_nonce,
    uint8_t* target_hash,
    uint8_t* best_hash
 ) {
    const size_t block_header_len = 80;
    std::vector<uint8_t> block_headers(block_header_len * num_nonces);
    std::vector<uint8_t> hashes(32 * num_nonces);
    for (uint32_t i = 0; i < num_nonces; i++) {
        uint32_t current_nonce = start_nonce + i;
        uint8_t* header = block_headers.data() + i * block_header_len;
        size_t header_len;
        blockheader_to_bytes(
            version,
            prev_block,
            merkle_root,
            timestamp,
            bits,
            &current_nonce,
            header,
            &header_len
        );
    }
    rinhash_cuda_batch(block_headers.data(), block_header_len, hashes.data(), num_nonces);
    memcpy(best_hash, hashes.data(), 32);
    *found_nonce = start_nonce;
    for (uint32_t i = 1; i < num_nonces; i++) {
        uint8_t* current_hash = hashes.data() + i * 32;
        bool is_better = false;
        for (int j = 0; j < 32; j++) {
            if (current_hash[j] < best_hash[j]) { is_better = true; break; }
            else if (current_hash[j] > best_hash[j]) { break; }
        }
        if (is_better) {
            memcpy(best_hash, current_hash, 32);
            *found_nonce = start_nonce + i;
        }
    }
 }
--- a/rin/miner/gpu/RinHash-hip/rinhash_device.cuh
+++ b/rin/miner/gpu/RinHash-hip/rinhash_device.cuh
@@ -0,0 +1,8 @@
 #ifndef RINHASH_DEVICE_CUH
 #define RINHASH_DEVICE_CUH
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <stdint.h>
 #endif // RINHASH_DEVICE_CUH
--- a/rin/miner/gpu/RinHash-hip/sha3-256.hip.cu
+++ b/rin/miner/gpu/RinHash-hip/sha3-256.hip.cu
@@ -0,0 +1,140 @@
 #include <stdint.h>
 #include <stddef.h>
 #define KECCAKF_ROUNDS 24
 // 64bit 値のビット回転（左回転）
 __device__ inline uint64_t rotate(uint64_t x, int n) {
    return (x << n) | (x >> (64 - n));
 }
 // Keccak‐f[1600] 変換（内部状態 st[25] に対して 24 ラウンドの permutation を実行）
 __device__ inline uint64_t ROTL64(uint64_t x, int n) {
    return (x << n) | (x >> (64 - n));
 }
 __device__ void keccakf(uint64_t st[25]) {
    const int R[24] = {
         1,  3,  6, 10, 15, 21,
        28, 36, 45, 55,  2, 14,
        27, 41, 56,  8, 25, 43,
        62, 18, 39, 61, 20, 44
    };
    const int P[24] = {
        10,  7, 11, 17, 18, 3,
         5, 16, 8, 21, 24, 4,
        15, 23, 19, 13, 12, 2,
        20, 14, 22,  9, 6,  1
    };
    const uint64_t RC[24] = {
        0x0000000000000001ULL, 0x0000000000008082ULL,
        0x800000000000808aULL, 0x8000000080008000ULL,
        0x000000000000808bULL, 0x0000000080000001ULL,
        0x8000000080008081ULL, 0x8000000000008009ULL,
        0x000000000000008aULL, 0x0000000000000088ULL,
        0x0000000080008009ULL, 0x000000008000000aULL,
        0x000000008000808bULL, 0x800000000000008bULL,
        0x8000000000008089ULL, 0x8000000000008003ULL,
        0x8000000000008002ULL, 0x8000000000000080ULL,
        0x000000000000800aULL, 0x800000008000000aULL,
        0x8000000080008081ULL, 0x8000000000008080ULL,
        0x0000000080000001ULL, 0x8000000080008008ULL
    };
    int i, j, round;
    uint64_t t, bc[5];
    for (round = 0; round < 24; round++) {
        // Theta
        for (i = 0; i < 5; i++)
            bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
        for (i = 0; i < 5; i++) {
            t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
            for (j = 0; j < 25; j += 5)
                st[j + i] ^= t;
        }
        // Rho and Pi
        t = st[1];
        for (i = 0; i < 24; i++) {
            j = P[i];
            bc[0] = st[j];
            st[j] = ROTL64(t, R[i]);
            t = bc[0];
        }
        // Chi
        for (j = 0; j < 25; j += 5) {
            for (i = 0; i < 5; i++)
                bc[i] = st[j + i];
            for (i = 0; i < 5; i++)
                st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
        }
        // Iota
        st[0] ^= RC[round];
    }
 }
 // little-endian で 64bit 値を読み込む（8 バイトの配列から）
 __device__ inline uint64_t load64_le(const uint8_t *src) {
    uint64_t x = 0;
    #pragma unroll
    for (int i = 0; i < 8; i++) {
        x |= ((uint64_t)src[i]) << (8 * i);
    }
    return x;
 }
 // little-endian で 64bit 値を書き込む（8 バイトの配列へ）
 __device__ inline void store64_le(uint8_t *dst, uint64_t x) {
    #pragma unroll
    for (int i = 0; i < 8; i++) {
        dst[i] = (uint8_t)(x >> (8 * i));
    }
 }
 /*
  __device__ 関数 sha3_256_device
    ・引数 input, inlen で与えられる入力データを吸収し、
      SHA3-256 仕様によりパディングおよび Keccak-f[1600] 変換を実行します。
    ・最終的に内部状態の先頭 32 バイト（4 ワード）を little-endian 形式で
      hash_out に出力します。
    ・SHA3-256 ではレート（吸収部サイズ）が 136 バイトです。
 */
 __device__ void sha3_256_device(const uint8_t *input, size_t inlen, uint8_t *hash_out) {
    const size_t rate = 136; // SHA3-256 の吸収部サイズ（バイト単位）
    uint64_t st[25] = {0};   // 内部状態（25ワード＝1600ビット）
    for (int i = 0; i < 25; i++) st[i] = 0;
    size_t offset = 0;
    // 通常ブロック（rateバイト）処理（今回inlen=32なのでスキップされるはず）
    while (inlen >= rate) {
        // 吸収
        for (int i = 0; i < (rate / 8); i++) {
            st[i] ^= load64_le(input + i * 8);
        }
        // 最終 Keccak-f
        keccakf(st);
        input += rate;
        inlen -= rate;
    }
    for (int i = 0; i < 4; i++) {
        st[i] ^= load64_le(input + i * 8);  // 4 * 8 = 32バイト
    }
    ((uint8_t*)st)[32] ^= 0x06;  // パディング（32バイト目）
    ((uint8_t*)st)[rate - 1] ^= 0x80;     // パディング（最後のバイト）
    keccakf(st);  // 最終 Keccak-f
    // スクイーズ：出力32バイト
    for (int i = 0; i < 4; i++) {
        store64_le(hash_out + i * 8, st[i]);
    }
 }
--- a/rin/miner/readme.md
+++ b/rin/miner/readme.md
@@ -20,4 +20,5 @@ cd cpuminer-opt-rinhash
 make -j$(nproc)
 # Test the newly built binary
-./cpuminer -a rinhash -o stratum+tcp://192.168.0.188:3333 -u username.workername -p x -t 4
+./cpuminer -a rinhash -o stratum+tcp://192.168.0.188:3333 -u db.win -p x -t 4
 cpuminer-rinhash.exe -a rinhash -o stratum+tcp://192.168.0.188:3334 -u db.win -p x -t 4
		`@@ -1,2 +0,0 @@`
			`# Auto detect text files and perform LF normalization`
			`* text=auto`
		`@@ -1 +0,0 @@`
			`See git repository ('git log') for full changelog.`
		`@@ -1,4 +0,0 @@`
			`Please consult the wiki for Windows compile instructions.`

			`https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source`
		`@@ -1,3 +0,0 @@`
			`cpuminer is available under the terms of the GNU Public License version 2.`

			`See COPYING for details.`