A bit of cleanup of BalticHash. Added wip simd version

This commit is contained in:
Szum123321 2022-11-27 23:12:25 +01:00
parent 300fe18b10
commit febbb95b97
2 changed files with 110 additions and 16 deletions

View File

@ -20,6 +20,7 @@ package net.szum123321.textile_backup.core.digest;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
/*
This algorithm copies construction of SeaHash (https://ticki.github.io/blog/seahash-explained/) including its IV
@ -27,33 +28,30 @@ import java.nio.ByteOrder;
I don't think it matters that much, honestly. One advantage the xoroshift has is that it should be
easier to implement with AVX. Java should soon ship its vector api by default.
*/
public class XorSeaHash implements Hash {
public class BalticHash implements Hash {
protected final static long[] IV = { 0x16f11fe89b0d677cL, 0xb480a793d8e6c86cL, 0x6fe2e5aaf078ebc9L, 0x14f994a4c5259381L};
//SeaHash IV
private final long[] state = { 0x16f11fe89b0d677cL, 0xb480a793d8e6c86cL, 0x6fe2e5aaf078ebc9L, 0x14f994a4c5259381L};
private final int buffer_size = (state.length + 1) * Long.BYTES;
private final int buffer_limit = state.length * Long.BYTES;
private final byte[] _byte_buffer = new byte[buffer_size];
private final long[] state = Arrays.copyOf(IV, IV.length);
protected final int buffer_limit = state.length * Long.BYTES;
protected final byte[] _byte_buffer = new byte[(state.length + 1) * Long.BYTES];
//Enforce endianness
private final ByteBuffer buffer = ByteBuffer.wrap(_byte_buffer).order(ByteOrder.LITTLE_ENDIAN);
protected final ByteBuffer buffer = ByteBuffer.wrap(_byte_buffer).order(ByteOrder.LITTLE_ENDIAN);
private long hashed_data_length = 0;
protected long hashed_data_length = 0;
@Override
public void update(byte b) {
buffer.put(b);
hashed_data_length += 1;
if (buffer.position() >= buffer_limit) round();
}
@Override
public void update(long b) {
buffer.putLong(b);
hashed_data_length += Long.BYTES;
if(buffer.position() >= buffer_limit) round();
}
public void update(byte [] data) { update(data, 0, data.length); }
public void update(byte[] data, int off, int len) {
int pos = off;
while(pos < len) {
@ -67,9 +65,11 @@ public class XorSeaHash implements Hash {
hashed_data_length += len;
}
@Override
public long getValue() {
if(buffer.position() != 0) round();
if(buffer.position() != 0) {
while(buffer.position() < buffer_limit) buffer.put((byte)0);
round();
}
long result = state[0];
result ^= state[1];
@ -80,8 +80,7 @@ public class XorSeaHash implements Hash {
return xorshift64star(result);
}
private void round() {
while(buffer.position() < buffer_limit) buffer.put((byte)0);
protected void round() {
int p = buffer.position();
buffer.rewind();
@ -91,7 +90,7 @@ public class XorSeaHash implements Hash {
if(p > buffer_limit) {
System.arraycopy(_byte_buffer, buffer_limit, _byte_buffer, 0, buffer.limit() - p);
buffer.position(buffer.limit() - p);
}
} else buffer.rewind();
}
long xorshift64star(long s) {

View File

@ -0,0 +1,95 @@
/*
* A simple backup mod for Fabric
* Copyright (C) 2022 Szum123321
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package net.szum123321.textile_backup.core.digest;
//import jdk.incubator.vector.*;
import net.szum123321.textile_backup.core.digest.BalticHash;
/*Mostly working XorSeaHash impl using SIMD. Should speed up calculation on most systems currently in use
It's actually slower. I tested it by comparing runtimes while hashing a directly opened FileInputStream.
My cpu is AMD Ryzen 5 3500U
There are two reasons I can think of: either vector construction simply takes so much time or jvm auto-vectorizes better than me
It's still probably far from being the slowest part of code, so I don't expect any major slowdowns
I will keep this code here for future work perhaps
*/
public class BalticHashSIMD extends BalticHash {
public BalticHashSIMD() { throw new UnsupportedOperationException(); } //For safety
/* private LongVector state = LongVector.fromArray(LongVector.SPECIES_256, IV, 0);
@Override
public long getValue() {
if(buffer.position() != 0) {
while(buffer.position() < buffer_limit) buffer.put((byte)0);
round();
}
long result = state.reduceLanesToLong(VectorOperators.XOR);
result ^= hashed_data_length;
return xorshift64star(result);
}
@Override
public void update(byte[] data, int off, int len) {
int pos = off;
while(pos < len) {
int n = Math.min(len - pos, buffer_limit - buffer.position());
if(n == 32) {
var v = ByteVector.fromArray(ByteVector.SPECIES_256, data, pos).reinterpretAsLongs();
state = state.lanewise(VectorOperators.XOR, v);
state = xorshift64star(state);
} else {
System.arraycopy(data, pos, _byte_buffer, buffer.position(), n);
buffer.position(buffer.position() + n);
if(buffer.position() == buffer_limit) round();
}
pos += n;
}
hashed_data_length += len;
}
@Override
protected void round() {
var s = ByteVector.fromArray(ByteVector.SPECIES_256, _byte_buffer, 0).reinterpretAsLongs();
state = state.lanewise(VectorOperators.XOR, s);
state = xorshift64star(state);
int p = buffer.position();
if(p > buffer_limit) {
System.arraycopy(_byte_buffer, buffer_limit, _byte_buffer, 0, buffer.limit() - p);
buffer.position(buffer.limit() - p);
} else buffer.rewind();
}
LongVector xorshift64star(LongVector v) {
v = v.lanewise(VectorOperators.XOR, v.lanewise(VectorOperators.ASHR, 12));
v = v.lanewise(VectorOperators.XOR, v.lanewise(VectorOperators.LSHL, 25));
v = v.lanewise(VectorOperators.XOR, v.lanewise(VectorOperators.ASHR, 27));
v = v.lanewise(VectorOperators.MUL, 0x2545F4914F6CDD1DL);
return v;
}*/
}