Selected hashing algorithm. It's a custom job which merges SeaHash with Xoroshift64*. Should be fast and correct enough to for this use case. hope I will be able to speed it up with SIMD, as java is scheduled to soon include Vector API (a part of project Panama)

This commit is contained in:
Szum123321 2022-11-27 13:55:34 +01:00
parent 9c37affacd
commit f6cd361fff
10 changed files with 161 additions and 38 deletions

View File

@ -20,7 +20,9 @@
package net.szum123321.textile_backup; package net.szum123321.textile_backup;
import net.minecraft.server.MinecraftServer; import net.minecraft.server.MinecraftServer;
import net.szum123321.textile_backup.core.Hash;
import net.szum123321.textile_backup.core.Utilities; import net.szum123321.textile_backup.core.Utilities;
import net.szum123321.textile_backup.core.XorSeaHash;
import net.szum123321.textile_backup.core.create.MakeBackupRunnable; import net.szum123321.textile_backup.core.create.MakeBackupRunnable;
import net.szum123321.textile_backup.core.restore.AwaitThread; import net.szum123321.textile_backup.core.restore.AwaitThread;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
@ -34,11 +36,13 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
public class Globals { public class Globals {
public static final Globals INSTANCE = new Globals(); public static final Globals INSTANCE = new Globals();
private final static TextileLogger log = new TextileLogger(TextileBackup.MOD_NAME); private static final TextileLogger log = new TextileLogger(TextileBackup.MOD_NAME);
public final static DateTimeFormatter defaultDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH.mm.ss"); public static final DateTimeFormatter defaultDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH.mm.ss");
public static final Supplier<Hash> CHECKSUM_SUPPLIER = XorSeaHash::new;
private ExecutorService executorService = null;// = Executors.newSingleThreadExecutor(); private ExecutorService executorService = null;// = Executors.newSingleThreadExecutor();
public final AtomicBoolean globalShutdownBackupFlag = new AtomicBoolean(true); public final AtomicBoolean globalShutdownBackupFlag = new AtomicBoolean(true);

View File

@ -18,35 +18,26 @@
package net.szum123321.textile_backup.core; package net.szum123321.textile_backup.core;
import net.szum123321.textile_backup.Globals;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.function.Supplier;
import java.util.zip.Checksum;
public class FileTreeHashBuilder { public class FileTreeHashBuilder {
private final static ThreadLocal<byte[]> buff =
ThreadLocal.withInitial(() -> new byte[Long.BYTES]);
private final Object lock = new Object(); private final Object lock = new Object();
private final Supplier<Checksum> hasherProvider;
private long hash = 0, filesProcessed = 0, filesTotalSize = 0; private long hash = 0, filesProcessed = 0, filesTotalSize = 0;
public FileTreeHashBuilder(Supplier<Checksum> provider) { hasherProvider = provider; }
public void update(Path path, long newHash) throws IOException { public void update(Path path, long newHash) throws IOException {
byte[] raw = buff.get(); var hasher = Globals.CHECKSUM_SUPPLIER.get();
var hasher = hasherProvider.get();
long size = Files.size(path); long size = Files.size(path);
hasher.update(ByteBuffer.wrap(raw).putLong(size).array());
hasher.update(path.toString().getBytes(StandardCharsets.UTF_8)); hasher.update(path.toString().getBytes(StandardCharsets.UTF_8));
hasher.update(ByteBuffer.wrap(raw).putLong(hash).array()); hasher.update(newHash);
synchronized (lock) { synchronized (lock) {
//This way exact order of files processed doesn't matter. //This way, the exact order of files processed doesn't matter.
this.hash ^= hasher.getValue(); this.hash ^= hasher.getValue();
filesProcessed++; filesProcessed++;
filesTotalSize += size; filesTotalSize += size;
@ -54,12 +45,11 @@ public class FileTreeHashBuilder {
} }
public long getValue() { public long getValue() {
var hasher = hasherProvider.get(); var hasher = Globals.CHECKSUM_SUPPLIER.get();
byte[] raw = buff.get();
hasher.update(ByteBuffer.wrap(raw).putLong(hash).array()); hasher.update(hash);
hasher.update(ByteBuffer.wrap(raw).putLong(filesProcessed).array()); hasher.update(filesProcessed);
hasher.update(ByteBuffer.wrap(raw).putLong(filesTotalSize).array()); hasher.update(filesTotalSize);
return hasher.getValue(); return hasher.getValue();
} }

View File

@ -0,0 +1,30 @@
/*
* A simple backup mod for Fabric
* Copyright (C) 2022 Szum123321
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package net.szum123321.textile_backup.core;
public interface Hash {
void update(byte b);
void update(long b);
default void update(byte[] b) {
update(b, 0, b.length);
}
void update(byte[] b, int off, int len);
long getValue();
}

View File

@ -0,0 +1,103 @@
/*
* A simple backup mod for Fabric
* Copyright (C) 2022 Szum123321
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package net.szum123321.textile_backup.core;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/*
This algorithm copies construction of SeaHash (https://ticki.github.io/blog/seahash-explained/) including its IV
What it differs in is that it uses Xoroshift64* instead of PCG. Although it might lower the output quality,
I don't think it matters that much, honestly. One advantage the xoroshift has is that it should be
easier to implement with AVX. Java should soon ship its vector api by default.
*/
public class XorSeaHash implements Hash {
//SeaHash IV
private final long[] state = { 0x16f11fe89b0d677cL, 0xb480a793d8e6c86cL, 0x6fe2e5aaf078ebc9L, 0x14f994a4c5259381L};
private final int buffer_size = (state.length + 1) * Long.BYTES;
private final int buffer_limit = state.length * Long.BYTES;
private final byte[] _byte_buffer = new byte[buffer_size];
//Enforce endianness
private final ByteBuffer buffer = ByteBuffer.wrap(_byte_buffer).order(ByteOrder.LITTLE_ENDIAN);
private long hashed_data_length = 0;
@Override
public void update(byte b) {
buffer.put(b);
hashed_data_length += 1;
if (buffer.position() >= buffer_limit) round();
}
@Override
public void update(long b) {
buffer.putLong(b);
hashed_data_length += Long.BYTES;
if(buffer.position() >= buffer_limit) round();
}
public void update(byte [] data) { update(data, 0, data.length); }
public void update(byte[] data, int off, int len) {
int pos = off;
while(pos < len) {
int n = Math.min(len - pos, buffer_limit - buffer.position());
System.arraycopy(data, pos, _byte_buffer, buffer.position(), n);
pos += n;
buffer.position(buffer.position() + n);
if(buffer.position() >= buffer_limit) round();
}
hashed_data_length += len;
}
@Override
public long getValue() {
if(buffer.position() != 0) round();
long result = state[0];
result ^= state[1];
result ^= state[2];
result ^= state[3];
result ^= hashed_data_length;
return xorshift64star(result);
}
private void round() {
while(buffer.position() < buffer_limit) buffer.put((byte)0);
int p = buffer.position();
buffer.rewind();
for(int i = 0; i < 4; i++) state[i] ^= buffer.getLong();
for(int i = 0; i < 4; i++) state[i] = xorshift64star(state[i]);
if(p > buffer_limit) {
System.arraycopy(_byte_buffer, buffer_limit, _byte_buffer, 0, buffer.limit() - p);
buffer.position(buffer.limit() - p);
}
}
long xorshift64star(long s) {
s ^= (s >> 12);
s ^= (s << 25);
s ^= (s >> 27);
return s * 0x2545F4914F6CDD1DL;
}
}

View File

@ -34,8 +34,7 @@ public record FileInputStreamSupplier(Path path, String name, FileTreeHashBuilde
@Override @Override
public InputStream getInputStream() throws IOException { public InputStream getInputStream() throws IOException {
try { try {
//TODO: select hashing algorithm! return new HashingInputStream(Files.newInputStream(path), path, hashTreeBuilder, brokenFileHandler);
return new HashingInputStream(Files.newInputStream(path), path, null, hashTreeBuilder, brokenFileHandler);
} catch (IOException e) { } catch (IOException e) {
brokenFileHandler.handle(path, e); brokenFileHandler.handle(path, e);
throw e; throw e;

View File

@ -18,27 +18,26 @@
package net.szum123321.textile_backup.core.create; package net.szum123321.textile_backup.core.create;
import net.szum123321.textile_backup.Globals;
import net.szum123321.textile_backup.core.DataLeftException; import net.szum123321.textile_backup.core.DataLeftException;
import net.szum123321.textile_backup.core.FileTreeHashBuilder; import net.szum123321.textile_backup.core.FileTreeHashBuilder;
import net.szum123321.textile_backup.core.Hash;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import java.io.*; import java.io.*;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.zip.Checksum;
//This class calculates a hash of the file on the input stream, submits it to FileTreeHashBuilder. //This class calculates a hash of the file on the input stream, submits it to FileTreeHashBuilder.
//In case the whole underlying stream hasn't been read, also puts it into BrokeFileHandler //In case the whole underlying stream hasn't been read, also puts it into BrokeFileHandler
public class HashingInputStream extends FilterInputStream { public class HashingInputStream extends FilterInputStream {
private final Path path; private final Path path;
private final Checksum hasher; private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get();
private final FileTreeHashBuilder hashBuilder; private final FileTreeHashBuilder hashBuilder;
private final BrokenFileHandler brokenFileHandler; private final BrokenFileHandler brokenFileHandler;
public HashingInputStream(InputStream in, Path path, Checksum hasher, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) { public HashingInputStream(InputStream in, Path path, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) {
super(in); super(in);
this.path = path; this.path = path;
this.hasher = hasher;
this.hashBuilder = hashBuilder; this.hashBuilder = hashBuilder;
this.brokenFileHandler = brokenFileHandler; this.brokenFileHandler = brokenFileHandler;
} }

View File

@ -44,7 +44,7 @@ public abstract class AbstractCompressor {
public void createArchive(Path inputFile, Path outputFile, BackupContext ctx, int coreLimit) throws IOException, ExecutionException, InterruptedException { public void createArchive(Path inputFile, Path outputFile, BackupContext ctx, int coreLimit) throws IOException, ExecutionException, InterruptedException {
Instant start = Instant.now(); Instant start = Instant.now();
FileTreeHashBuilder fileHashBuilder = new FileTreeHashBuilder(() -> null); //TODO: select hashing algorithm FileTreeHashBuilder fileHashBuilder = new FileTreeHashBuilder();
BrokenFileHandler brokenFileHandler = new BrokenFileHandler(); BrokenFileHandler brokenFileHandler = new BrokenFileHandler();
try (OutputStream outStream = Files.newOutputStream(outputFile); try (OutputStream outStream = Files.newOutputStream(outputFile);

View File

@ -18,29 +18,27 @@
package net.szum123321.textile_backup.core.restore; package net.szum123321.textile_backup.core.restore;
import net.szum123321.textile_backup.Globals;
import net.szum123321.textile_backup.core.FileTreeHashBuilder; import net.szum123321.textile_backup.core.FileTreeHashBuilder;
import net.szum123321.textile_backup.core.Hash;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import java.io.FilterOutputStream; import java.io.FilterOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.zip.Checksum;
public class HashingOutputStream extends FilterOutputStream { public class HashingOutputStream extends FilterOutputStream {
private final Path path; private final Path path;
private final Checksum hasher; private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get();
private final FileTreeHashBuilder hashBuilder; private final FileTreeHashBuilder hashBuilder;
public HashingOutputStream(OutputStream out, Path path, Checksum hasher, FileTreeHashBuilder hashBuilder) { public HashingOutputStream(OutputStream out, Path path, FileTreeHashBuilder hashBuilder) {
super(out); super(out);
this.path = path; this.path = path;
this.hasher = hasher;
this.hashBuilder = hashBuilder; this.hashBuilder = hashBuilder;
} }
@Override @Override
public void write(int b) throws IOException { public void write(int b) throws IOException {
super.write(b); super.write(b);
@ -48,7 +46,7 @@ public class HashingOutputStream extends FilterOutputStream {
} }
@Override @Override
public void write(byte[] @NotNull b, int off, int len) throws IOException { public void write(byte @NotNull [] b, int off, int len) throws IOException {
super.write(b, off, len); super.write(b, off, len);
hasher.update(b, off, len); hasher.update(b, off, len);
} }

View File

@ -40,7 +40,7 @@ public class GenericTarDecompressor {
public static long decompress(Path input, Path target) throws IOException { public static long decompress(Path input, Path target) throws IOException {
Instant start = Instant.now(); Instant start = Instant.now();
FileTreeHashBuilder treeBuilder = new FileTreeHashBuilder(() -> null); FileTreeHashBuilder treeBuilder = new FileTreeHashBuilder();
try (InputStream fileInputStream = Files.newInputStream(input); try (InputStream fileInputStream = Files.newInputStream(input);
InputStream bufferedInputStream = new BufferedInputStream(fileInputStream); InputStream bufferedInputStream = new BufferedInputStream(fileInputStream);

View File

@ -40,7 +40,7 @@ public class ZipDecompressor {
public static long decompress(Path inputFile, Path target) throws IOException { public static long decompress(Path inputFile, Path target) throws IOException {
Instant start = Instant.now(); Instant start = Instant.now();
FileTreeHashBuilder hashBuilder = new FileTreeHashBuilder(() -> null); FileTreeHashBuilder hashBuilder = new FileTreeHashBuilder();
try(ZipFile zipFile = new ZipFile(inputFile.toFile())) { try(ZipFile zipFile = new ZipFile(inputFile.toFile())) {
for (Iterator<ZipArchiveEntry> it = zipFile.getEntries().asIterator(); it.hasNext(); ) { for (Iterator<ZipArchiveEntry> it = zipFile.getEntries().asIterator(); it.hasNext(); ) {