Merge pull request #9 from Szum123321/multiple_compression_format_dev

Multiple compression format dev
This commit is contained in:
Szum123321 2020-04-19 10:46:12 +02:00 committed by GitHub
commit 434a16011a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
61 changed files with 8269 additions and 250 deletions

10
Copyright_Notice.txt Normal file
View File

@ -0,0 +1,10 @@
This project uses third party libraries as its dependencies and includes them in jar. Those are :
Apache Commons Compress licensed under Apache License Version 2.0 which can be found at http://www.apache.org/licenses/
Cotton config, Cotton logging, and Jankson-Fabric all by Cotton team licensed under MIT license which can be found at https://github.com/CottonMC/Cotton
XZ for Java by Tukaani released as public domain. https://tukaani.org/xz/java.html
parallelgzip by shevek under Apache 2.0 http://www.apache.org/licenses/
Parallel BZip2 compression by Karl Gustafsson at http://at4j.sourceforge.net/ under GPL v3
Some code was partially or fully inspired by:
Parallel zip compression: https://stackoverflow.com/questions/54624695/how-to-implement-parallel-zip-creation-with-scatterzipoutputstream-with-zip64-su
answer by: https://stackoverflow.com/users/2987755/dkb

View File

@ -15,14 +15,14 @@ Available operations are:
* whitelist - here you can add, remove and list player that are allowed to run any operation within this mod despite not having high enough permission level*
* whitelist - here you can add, remove and list player that are not allowed to run any operation within this mod despite having high enough permission level*
All of above can only be done by server admins(permission level 4 - configurable*) / player on single player with cheats on.
All of the above can only be done by server admins(permission level 4 - configurable*) or player on a single player.
Feel free to use this mod in your modpack or on server!
Feel free to use this mod in your modpack or on a server!
### Important
* Time format defaultly used by this mod is: dd.MM.yyyy_HH-mm-ss although it is configurable*.
* _This mod contains **Cotton Config** and its dependencies as jars in jar, which are property of **CottonMC**_.
* _This mod contains **Cotton Config** and its dependencies as jars in a jar, which are property of **CottonMC**_.
\* - feature available since 1.1.0

View File

@ -15,6 +15,7 @@ minecraft {
repositories{
maven { url 'http://server.bbkr.space:8081/artifactory/libs-release' }
maven { url 'https://jitpack.io' }
}
@ -33,11 +34,14 @@ dependencies {
include "io.github.cottonmc.cotton:cotton-logging:1.0.0-rc.4"
include "io.github.cottonmc.cotton:cotton-config:1.0.0-rc.7"
include "org.apache.commons:commons-compress:1.9"
include "org.apache.commons:commons-io:1.3.2"
modCompile "org.apache.commons:commons-compress:1.13"
include "org.apache.commons:commons-compress:1.13"
// PSA: Some older mods, compiled on Loom 0.2.1, might have outdated Maven POMs.
// You may need to force-disable transitiveness on them.
modCompile "org.tukaani:xz:1.8"
include "org.tukaani:xz:1.8"
modCompile 'com.github.shevek:parallelgzip:master-SNAPSHOT'
include 'com.github.shevek:parallelgzip:master-SNAPSHOT'
}
processResources {

View File

@ -1,14 +1,14 @@
# Done to increase the memory available to gradle.
org.gradle.jvmargs=-Xmx1G
minecraft_version=20w14a
yarn_mappings=20w14a+build.1
loader_version=0.7.9+build.190
minecraft_version=1.15.2
yarn_mappings=1.15.2+build.15
loader_version=0.8.2+build.194
#Fabric api
fabric_version=0.5.7+build.314-1.16
fabric_version=0.5.1+build.294-1.15
# Mod Properties
mod_version = 1.1.1-1.15
maven_group = net.szum123321
archives_base_name = textile_backup
mod_version = 1.2.0-1.15
maven_group = net.szum123321
archives_base_name = textile_backup

View File

@ -54,8 +54,15 @@ public class ConfigHandler {
@Comment("\nMaximum size of backup folder in kilo bytes. \n")
public int maxSize = 0;
@Comment("\nCompression level \n0 - 9\n")
public int compression = 1;
@Comment("\nCompression level \n0 - 9\n Only available for zip compression.\n")
public int compression = 6;
@Comment(value = "\nAvailable formats are:\n" +
"ZIP - normal zip archive using standard deflate compression\n" +
"GIZP - tar.gz using gzip compression\n" +
"BZIP2 - tar.bz2 archive using bzip2 compression\n" +
"LZMA - tar.xz using lzma compression\n")
public ArchiveFormat format = ArchiveFormat.ZIP;
@Comment("\nPrint info to game out\n")
public boolean log = true;
@ -74,4 +81,21 @@ public class ConfigHandler {
@Comment("\nFormat of date&time used to name backup files.\n")
public String dateTimeFormat = "dd.MM.yyyy_HH-mm-ss";
public enum ArchiveFormat {
ZIP(".zip"),
GZIP(".tar.gz"),
BZIP2(".tar.bz2"),
LZMA(".tar.xz");
private final String extension;
private ArchiveFormat(String extension){
this.extension = extension;
}
public String getExtension() {
return extension;
}
}
}

View File

@ -21,7 +21,6 @@ package net.szum123321.textile_backup.commands;
import com.mojang.brigadier.builder.LiteralArgumentBuilder;
import net.minecraft.server.command.CommandManager;
import net.minecraft.server.command.ServerCommandSource;
import net.minecraft.text.LiteralText;
import net.minecraft.world.dimension.DimensionType;
import net.szum123321.textile_backup.core.BackupHelper;
@ -33,7 +32,6 @@ public class CleanupCommand {
private static int execute(ServerCommandSource source){
BackupHelper.executeFileLimit(source, source.getMinecraftServer().getWorld(DimensionType.OVERWORLD).getLevelProperties().getLevelName());
source.sendFeedback(new LiteralText("Done"), false);
return 1;
}

View File

@ -21,124 +21,152 @@ package net.szum123321.textile_backup.core;
import net.fabricmc.loader.api.FabricLoader;
import net.minecraft.server.MinecraftServer;
import net.minecraft.server.command.ServerCommandSource;
import net.szum123321.textile_backup.ConfigHandler;
import net.szum123321.textile_backup.TextileBackup;
import org.apache.commons.io.FileUtils;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.attribute.FileTime;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.Objects;
public class BackupHelper {
public static void create(MinecraftServer server, ServerCommandSource ctx, boolean save, String comment) {
LocalDateTime now = LocalDateTime.now();
public static Thread create(MinecraftServer server, ServerCommandSource ctx, boolean save, String comment) {
LocalDateTime now = LocalDateTime.now();
StringBuilder builder = new StringBuilder();
builder.append("Backup started by: ");
StringBuilder builder = new StringBuilder();
builder.append("Backup started by: ");
if( ctx != null )
builder.append(ctx.getName());
else
builder.append("SERVER");
if (ctx != null)
builder.append(ctx.getName());
else
builder.append("SERVER");
builder.append(" on: ");
builder.append(Utilities.getDateTimeFormatter().format(now));
builder.append(" on: ");
builder.append(Utilities.getDateTimeFormatter().format(now));
Utilities.log(builder.toString(), null);
Utilities.log(builder.toString(), null);
Utilities.log("Saving server...", ctx);
Utilities.log("Saving server...", ctx);
if(save)
server.save(true, false, false);
if (save)
server.save(true, true, false);
Thread thread = new Thread(new MakeBackupThread(server, ctx, comment));
Thread thread = new Thread(new MakeBackupThread(server, ctx, comment));
thread.start();
}
thread.start();
public static void executeFileLimit(ServerCommandSource ctx, String worldName){
File root = getBackupRootPath(worldName);
return thread;
}
FileFilter filter = f -> f.getName().endsWith("zip");
public static void executeFileLimit(ServerCommandSource ctx, String worldName) {
File root = getBackupRootPath(worldName);
if(root.isDirectory() && root.exists()){
if(TextileBackup.config.maxAge > 0){
LocalDateTime now = LocalDateTime.now();
if (root.isDirectory() && root.exists()) {
if (TextileBackup.config.maxAge > 0) {
LocalDateTime now = LocalDateTime.now();
Arrays.stream(root.listFiles()).forEach(f ->{
if(f.exists() && f.isFile()){
LocalDateTime creationTime;
Arrays.stream(root.listFiles()).filter(f -> f.exists() && f.isFile()).forEach(f -> {
LocalDateTime creationTime;
try {
creationTime = LocalDateTime.from(
Utilities.getDateTimeFormatter().parse(
f.getName().split(".zip")[0].split("#")[0]
)
);
}catch(Exception e){
creationTime = LocalDateTime.from(
Utilities.getBackupDateTimeFormatter().parse(
f.getName().split(".zip")[0].split("#")[0]
)
);
try {
try {
FileTime fileTime = (FileTime) Files.getAttribute(f.toPath(), "creationTime");
}
creationTime = LocalDateTime.ofInstant(fileTime.toInstant(), ZoneOffset.UTC);
} catch (IOException ignored) {
try {
creationTime = LocalDateTime.from(
Utilities.getDateTimeFormatter().parse(
f.getName().split(Objects.requireNonNull(getFileExtension(f)))[0].split("#")[0]
)
);
} catch (Exception ignored2) {
creationTime = LocalDateTime.from(
Utilities.getBackupDateTimeFormatter().parse(
f.getName().split(Objects.requireNonNull(getFileExtension(f)))[0].split("#")[0]
)
);
}
}
if(now.toEpochSecond(ZoneOffset.UTC) - creationTime.toEpochSecond(ZoneOffset.UTC) > TextileBackup.config.maxAge) {
Utilities.log("Deleting: " + f.getName(), ctx);
f.delete();
}
}
});
}
if (now.toEpochSecond(ZoneOffset.UTC) - creationTime.toEpochSecond(ZoneOffset.UTC) > TextileBackup.config.maxAge) {
Utilities.log("Deleting: " + f.getName(), ctx);
f.delete();
}
} catch (NullPointerException ignored3) {}
});
}
if(TextileBackup.config.backupsToKeep > 0 && root.listFiles().length > TextileBackup.config.backupsToKeep){
int var1 = root.listFiles().length - TextileBackup.config.backupsToKeep;
if (TextileBackup.config.backupsToKeep > 0 && root.listFiles().length > TextileBackup.config.backupsToKeep) {
int var1 = root.listFiles().length - TextileBackup.config.backupsToKeep;
File[] files = root.listFiles(filter);
assert files != null;
File[] files = root.listFiles();
assert files != null;
Arrays.sort(files);
Arrays.sort(files);
for(int i = 0; i < var1; i++) {
Utilities.log("Deleting: " + files[i].getName(), ctx);
files[i].delete();
}
}
for (int i = 0; i < var1; i++) {
Utilities.log("Deleting: " + files[i].getName(), ctx);
files[i].delete();
}
}
if(TextileBackup.config.maxSize > 0 && FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize){
Arrays.stream(root.listFiles()).sorted().forEach(e -> {
if(FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize){
Utilities.log("Deleting: " + e.getName(), ctx);
e.delete();
}
});
}
}
}
if (TextileBackup.config.maxSize > 0 && FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize) {
Arrays.stream(root.listFiles()).filter(File::isFile).sorted().forEach(e -> {
if (FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize) {
Utilities.log("Deleting: " + e.getName(), ctx);
e.delete();
}
});
}
}
}
public static File getBackupRootPath(String worldName){
File path = new File(TextileBackup.config.path);
private static String getFileExtension(File f) {
String[] parts = f.getName().split("\\.");
if(TextileBackup.config.perWorldBackup)
path = path.toPath().resolve(worldName).toFile();
switch (parts[parts.length - 1]) {
case "zip":
return ConfigHandler.ArchiveFormat.ZIP.getExtension();
case "bz2":
return ConfigHandler.ArchiveFormat.BZIP2.getExtension();
case "gz":
return ConfigHandler.ArchiveFormat.GZIP.getExtension();
case "xz":
return ConfigHandler.ArchiveFormat.LZMA.getExtension();
if(!path.exists()){
try{
path.mkdirs();
}catch(Exception e){
TextileBackup.logger.error(e.getMessage());
default:
return null;
}
}
return FabricLoader
.getInstance()
.getGameDirectory()
.toPath()
.resolve(TextileBackup.config.path)
.toFile();
}
}
return path;
}
}
public static File getBackupRootPath(String worldName) {
File path = new File(TextileBackup.config.path).getAbsoluteFile();
if (TextileBackup.config.perWorldBackup)
path = path.toPath().resolve(worldName).toFile();
if (!path.exists()) {
try {
path.mkdirs();
} catch (Exception e) {
TextileBackup.logger.error(e.getMessage());
return FabricLoader
.getInstance()
.getGameDirectory()
.toPath()
.resolve(TextileBackup.config.path)
.toFile();
}
}
return path;
}
}

View File

@ -1,69 +0,0 @@
/*
A simple backup mod for Fabric
Copyright (C) 2020 Szum123321
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package net.szum123321.textile_backup.core;
import net.minecraft.server.command.ServerCommandSource;
import net.szum123321.textile_backup.TextileBackup;
import org.apache.commons.compress.utils.IOUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.time.LocalDateTime;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class Compressor {
public static void createArchive(File in, File out, ServerCommandSource ctx){
Utilities.log("Starting compression...", ctx);
try {
File input = in.getCanonicalFile();
ZipOutputStream arc = new ZipOutputStream(new FileOutputStream(out));
arc.setLevel(TextileBackup.config.compression);
arc.setComment("Created on: " + Utilities.getDateTimeFormatter().format(LocalDateTime.now()));
int rootPathLength = input.toString().length() + 1;
Files.walk(input.toPath()).filter(path -> !path.equals(input.toPath()) && path.toFile().isFile() && !TextileBackup.config.fileBlacklist.contains(path.toString().substring(rootPathLength))).forEach(path -> {
try{
File file = path.toAbsolutePath().toFile();
ZipEntry entry = new ZipEntry(file.getAbsolutePath().substring(rootPathLength));
arc.putNextEntry(entry);
entry.setSize(file.length());
IOUtils.copy(new FileInputStream(file), arc);
arc.closeEntry();
}catch (IOException e){
TextileBackup.logger.error(e.getMessage());
}
});
arc.close();
} catch (IOException e) {
TextileBackup.logger.error(e.getMessage());
}
Utilities.log("Compression finished", ctx);
}
}

View File

@ -21,15 +21,22 @@ package net.szum123321.textile_backup.core;
import net.minecraft.server.MinecraftServer;
import net.minecraft.server.command.ServerCommandSource;
import net.minecraft.world.dimension.DimensionType;
import net.szum123321.textile_backup.TextileBackup;
import net.szum123321.textile_backup.core.compressors.GenericTarCompressor;
import net.szum123321.textile_backup.core.compressors.ParallelBZip2Compressor;
import net.szum123321.textile_backup.core.compressors.ParallelZipCompressor;
import org.anarres.parallelgzip.ParallelGZIPOutputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
import org.at4j.comp.bzip2.BZip2OutputStream;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
public class MakeBackupThread implements Runnable {
private MinecraftServer server;
private ServerCommandSource ctx;
private String comment;
private final MinecraftServer server;
private final ServerCommandSource ctx;
private final String comment;
public MakeBackupThread(MinecraftServer server, ServerCommandSource ctx, String comment){
this.server = server;
@ -59,7 +66,28 @@ public class MakeBackupThread implements Runnable {
return;
}
ZipCompressor.createArchive(world, outFile, ctx);
switch (TextileBackup.config.format) {
case ZIP:
ParallelZipCompressor.createArchive(world, outFile, ctx);
break;
case BZIP2:
ParallelBZip2Compressor.createArchive(world, outFile, ctx);
break;
case GZIP:
GenericTarCompressor.createArchive(world, outFile, ParallelGZIPOutputStream.class, ctx);
break;
case LZMA:
GenericTarCompressor.createArchive(world, outFile, XZCompressorOutputStream.class, ctx);
break;
default:
Utilities.log("Error! No correct compression format specified! using default compressor!", ctx);
ParallelZipCompressor.createArchive(world, outFile, ctx);
break;
}
BackupHelper.executeFileLimit(ctx, server.getWorld(DimensionType.OVERWORLD).getLevelProperties().getLevelName());
@ -69,6 +97,6 @@ public class MakeBackupThread implements Runnable {
private String getFileName(){
LocalDateTime now = LocalDateTime.now();
return Utilities.getDateTimeFormatter().format(now) + (comment != null ? "#" + comment.replace("#", ""): "") + ".zip";
return Utilities.getDateTimeFormatter().format(now) + (comment != null ? "#" + comment.replace("#", "") : "") + TextileBackup.config.format.getExtension();
}
}

View File

@ -4,6 +4,7 @@ import net.minecraft.server.command.ServerCommandSource;
import net.minecraft.text.LiteralText;
import net.szum123321.textile_backup.TextileBackup;
import java.nio.file.Path;
import java.time.format.DateTimeFormatter;
public class Utilities {
@ -12,6 +13,15 @@ public class Utilities {
return os.toLowerCase().startsWith("win");
}
public static boolean isBlacklisted(Path path) {
for(String i : TextileBackup.config.fileBlacklist) {
if(path.startsWith(i))
return true;
}
return false;
}
public static DateTimeFormatter getDateTimeFormatter(){
if(!TextileBackup.config.dateTimeFormat.equals(""))
return DateTimeFormatter.ofPattern(TextileBackup.config.dateTimeFormat);

View File

@ -1,68 +0,0 @@
/*
A simple backup mod for Fabric
Copyright (C) 2020 Szum123321
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package net.szum123321.textile_backup.core;
import net.minecraft.server.command.ServerCommandSource;
import net.szum123321.textile_backup.TextileBackup;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.apache.commons.compress.utils.IOUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.time.LocalDateTime;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class ZipCompressor {
public static void createArchive(File in, File out, ServerCommandSource ctx){
Utilities.log("Starting compression...", ctx);
try (ZipOutputStream arc = new ZipOutputStream(new FileOutputStream(out))){
arc.setLevel(TextileBackup.config.compression);
arc.setComment("Created on: " + Utilities.getDateTimeFormatter().format(LocalDateTime.now()));
File input = in.getCanonicalFile();
int rootPathLength = input.toString().length() + 1;
Files.walk(input.toPath()).filter(path -> !path.equals(input.toPath()) && path.toFile().isFile() && !TextileBackup.config.fileBlacklist.contains(path.toString().substring(rootPathLength))).forEach(path -> {
try{
File file = path.toAbsolutePath().toFile();
ZipEntry entry = new ZipEntry(file.getAbsolutePath().substring(rootPathLength));
arc.putNextEntry(entry);
entry.setSize(file.length());
IOUtils.copy(new FileInputStream(file), arc);
arc.closeEntry();
}catch (IOException e){
TextileBackup.logger.error(e.getMessage());
}
});
} catch (IOException e) {
TextileBackup.logger.error(e.getMessage());
}
Utilities.log("Compression finished", ctx);
}
}

View File

@ -0,0 +1,60 @@
package net.szum123321.textile_backup.core.compressors;
import net.minecraft.server.command.ServerCommandSource;
import net.szum123321.textile_backup.TextileBackup;
import net.szum123321.textile_backup.core.Utilities;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.utils.IOUtils;
import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.nio.file.Files;
public class GenericTarCompressor {
public static void createArchive(File in, File out, Class<? extends OutputStream> CompressorStreamClass, ServerCommandSource ctx) {
Utilities.log("Starting compression...", ctx);
long start = System.nanoTime();
try (FileOutputStream outStream = new FileOutputStream(out);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(outStream);
OutputStream compressorStream = CompressorStreamClass.getDeclaredConstructor(OutputStream.class).newInstance(bufferedOutputStream);// CompressorStreamClass.getConstructor().newInstance(bufferedOutputStream);
TarArchiveOutputStream arc = new TarArchiveOutputStream(compressorStream)) {
arc.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
arc.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
File input = in.getCanonicalFile();
Files.walk(input.toPath()
).filter(path -> !path.equals(input.toPath()) &&
path.toFile().isFile() &&
!Utilities.isBlacklisted(input.toPath().relativize(path))
).forEach(path -> {
File file = path.toAbsolutePath().toFile();
try (FileInputStream fin = new FileInputStream(file);
BufferedInputStream bfin = new BufferedInputStream(fin)) {
ArchiveEntry entry = arc.createArchiveEntry(file, input.toPath().relativize(path).toString());
arc.putArchiveEntry(entry);
IOUtils.copy(bfin, arc);
arc.closeArchiveEntry();
} catch (IOException e) {
TextileBackup.logger.error(e.getMessage());
}
});
arc.finish();
} catch (IOException | IllegalAccessException | NoSuchMethodException | InstantiationException | InvocationTargetException e) {
TextileBackup.logger.error(e.toString());
}
long end = System.nanoTime();
Utilities.log("Compression took: " + ((end - start) / 1000000000.0) + "s", ctx);
}
}

View File

@ -0,0 +1,62 @@
package net.szum123321.textile_backup.core.compressors;
import net.minecraft.server.command.ServerCommandSource;
import net.szum123321.textile_backup.TextileBackup;
import net.szum123321.textile_backup.core.Utilities;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.at4j.comp.bzip2.BZip2OutputStream;
import org.at4j.comp.bzip2.BZip2OutputStreamSettings;
import java.io.*;
import java.nio.file.Files;
public class ParallelBZip2Compressor {
public static void createArchive(File in, File out, ServerCommandSource ctx) {
Utilities.log("Starting compression...", ctx);
BZip2OutputStreamSettings settings = new BZip2OutputStreamSettings().setNumberOfEncoderThreads(Runtime.getRuntime().availableProcessors());
long start = System.nanoTime();
try (FileOutputStream fileOutputStream = new FileOutputStream(out);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
BZip2OutputStream bZip2OutputStream = new BZip2OutputStream(bufferedOutputStream, settings);
TarArchiveOutputStream arc = new TarArchiveOutputStream(bZip2OutputStream)) {
arc.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
arc.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
File input = in.getCanonicalFile();
Files.walk(input.toPath()
).filter(path -> !path.equals(input.toPath()) &&
path.toFile().isFile() &&
!Utilities.isBlacklisted(input.toPath().relativize(path))
).forEach(path -> {
File file = path.toAbsolutePath().toFile();
try (FileInputStream fin = new FileInputStream(file);
BufferedInputStream bfin = new BufferedInputStream(fin)) {
ArchiveEntry entry = arc.createArchiveEntry(file, input.toPath().relativize(path).toString());
arc.putArchiveEntry(entry);
IOUtils.copy(bfin, arc);
arc.closeArchiveEntry();
} catch (IOException e) {
TextileBackup.logger.error(e.getMessage());
}
});
arc.finish();
} catch (IOException e) {
e.printStackTrace();
}
long end = System.nanoTime();
Utilities.log("Compression took: " + ((end - start) / 1000000000.0) + "s", ctx);
}
}

View File

@ -0,0 +1,82 @@
package net.szum123321.textile_backup.core.compressors;
import net.minecraft.server.command.ServerCommandSource;
import net.szum123321.textile_backup.TextileBackup;
import net.szum123321.textile_backup.core.Utilities;
import org.apache.commons.compress.archivers.zip.*;
import org.apache.commons.compress.parallel.InputStreamSupplier;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDateTime;
import java.util.concurrent.*;
import java.util.zip.ZipEntry;
/*
This part of code is based on:
https://stackoverflow.com/questions/54624695/how-to-implement-parallel-zip-creation-with-scatterzipoutputstream-with-zip64-su
answer by:
https://stackoverflow.com/users/2987755/dkb
*/
public class ParallelZipCompressor {
public static void createArchive(File in, File out, ServerCommandSource ctx) {
Utilities.log("Starting compression...", ctx);
long start = System.nanoTime();
try (FileOutputStream fileOutputStream = new FileOutputStream(out);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
ZipArchiveOutputStream arc = new ZipArchiveOutputStream(bufferedOutputStream)) {
ParallelScatterZipCreator scatterZipCreator = new ParallelScatterZipCreator();
arc.setMethod(ZipArchiveOutputStream.DEFLATED);
arc.setUseZip64(Zip64Mode.AsNeeded);
arc.setLevel(TextileBackup.config.compression);
arc.setComment("Created on: " + Utilities.getDateTimeFormatter().format(LocalDateTime.now()));
File input = in.getCanonicalFile();
Files.walk(input.toPath()
).filter(path -> !path.equals(input.toPath()) &&
path.toFile().isFile() &&
!Utilities.isBlacklisted(input.toPath().relativize(path))
).forEach(p -> {
ZipArchiveEntry entry = new ZipArchiveEntry(input.toPath().relativize(p).toString());
entry.setMethod(ZipEntry.DEFLATED);
FileInputStreamSupplier supplier = new FileInputStreamSupplier(p);
scatterZipCreator.addArchiveEntry(entry, supplier);
});
scatterZipCreator.writeTo(arc);
arc.finish();
} catch (IOException | InterruptedException | ExecutionException e) {
TextileBackup.logger.error(e.getMessage());
}
long end = System.nanoTime();
Utilities.log("Compression took: " + ((end - start) / 1000000000.0) + "s", ctx);
}
static class FileInputStreamSupplier implements InputStreamSupplier {
private final Path sourceFile;
private InputStream stream;
FileInputStreamSupplier(Path sourceFile) {
this.sourceFile = sourceFile;
}
public InputStream get() {
try {
stream = Files.newInputStream(sourceFile);
} catch (IOException e) {
e.printStackTrace();
}
return stream;
}
}
}

View File

@ -52,7 +52,12 @@ public abstract class MinecraftServerMixin {
@Inject(method = "shutdown", at = @At(value = "INVOKE_ASSIGN", target = "Lnet/minecraft/server/MinecraftServer;save(ZZZ)Z"))
public void onShutdown(CallbackInfo ci){
if(TextileBackup.config.shutdownBackup)
BackupHelper.create((MinecraftServer)(Object)this, null, false, null);
if(TextileBackup.config.shutdownBackup) {
try {
BackupHelper.create((MinecraftServer) (Object) this, null, false, "shutdown").join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}

View File

@ -0,0 +1,43 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp;
/**
* This is an enumeration over different generic compression levels supported by
* some of At4J's compression algorithm.
* @author Karl Gustafsson
* @since 1.0.2
*/
public enum CompressionLevel
{
BEST("best"), DEFAULT("default"), FASTEST("fastest");
private final String m_tag;
private CompressionLevel(String tag)
{
m_tag = tag;
}
@Override
public String toString()
{
return m_tag + " compression";
}
}

View File

@ -0,0 +1,50 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* This interface identifies an executor service that is used to spread the
* encoding of bzip2 blocks over several threads. It can be used to speed up
* bzip2 encoding.
* <p>
* The executor service spreads the work over all threads available to it. If a
* {@link BZip2OutputStream} submits more work when all threads are busy, the
* call blocks until the next thread becomes available.
* <p>
* When the client is done using the executor, it must call {@link #shutdown()}
* to release all of its resources.
* <p>
* An executor service instance can be had from the
* {@link BZip2OutputStream#createExecutorService(int)} method.
* <p>
* This interface does not expose any methods except the {@link #shutdown()}
* method and there is no way of making a custom executor service
* implementation.
* @author Karl Gustafsson
* @since 1.1
*/
public interface BZip2EncoderExecutorService
{
/**
* This method should be called when the executor service is no longer
* needed. It terminates all threads and releases all other resources
* associated with the executor.
*/
void shutdown();
}

View File

@ -0,0 +1,86 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* This is the only implementation of {@link BZip2EncoderExecutorService}. All
* objects that are using that interface assume that it is implemented by this
* class.
* @author Karl Gustafsson
* @since 1.1
*/
final class BZip2EncoderExecutorServiceImpl implements BZip2EncoderExecutorService
{
/**
* This rejected execution handler shoehorns in a job in an
* {@link ExecutorService}'s job queue if it is rejected by the service.
* This requires that the service's job queue has an upper bound and that it
* blocks when trying to insert more elements than the bound.
* @author Karl Gustafsson
* @since 1.1
*/
private static class ShoehornInJobRejectedExecutionHandler implements RejectedExecutionHandler
{
private static final ShoehornInJobRejectedExecutionHandler INSTANCE = new ShoehornInJobRejectedExecutionHandler();
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor)
{
// System.out.print("Shoehorning... ");
try
{
executor.getQueue().put(r);
}
catch (InterruptedException e)
{
throw new RuntimeException(e);
}
// System.out.println("done");
}
}
private final ThreadPoolExecutor m_executor;
private final ErrorState m_errorState;
BZip2EncoderExecutorServiceImpl(int noThreads, ErrorState es)
{
m_executor = new ThreadPoolExecutor(noThreads, noThreads, 100, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1), new EncodingThreadFactory(es), ShoehornInJobRejectedExecutionHandler.INSTANCE);
m_errorState = es;
}
ErrorState getErrorState()
{
return m_errorState;
}
void execute(BlockEncoderRunnable r)
{
m_executor.execute(r);
}
public void shutdown()
{
m_executor.shutdown();
}
}

View File

@ -0,0 +1,306 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.io.OutputStream;
import java.util.concurrent.atomic.AtomicInteger;
import org.at4j.support.io.LittleEndianBitOutputStream;
/**
* This is an {@link OutputStream} for bzip2 compressing data.
* <p>
* For more information on the inner workings of bzip2, see <a
* href="http://en.wikipedia.org/wiki/Bzip2">the Wikipedia article on bzip2</a>.
* <p>
* This stream is <i>not</i> safe for concurrent access by several writing
* threads. A client must provide external synchronization to use this from
* several threads.
* @author Karl Gustafsson
* @since 1.1
* @see BZip2OutputStreamSettings
*/
public class BZip2OutputStream extends OutputStream
{
private static final byte[] EOS_MAGIC = new byte[] { 0x17, 0x72, 0x45, 0x38, 0x50, (byte) 0x90 };
// This is used to generate unique hash codes for each created stream
// object.
private static final AtomicInteger HASH_CODE_GENERATOR = new AtomicInteger(0);
private final LittleEndianBitOutputStream m_wrapped;
// The block size in bytes
private final int m_blockSize;
// This may be null
// Data stream that writes to the block currently being filled with data.
private final BlockOutputStream m_blockOutputStream;
// If several threads are used to encode the data, this is used to write the
// encoded blocks in the right order.
private final EncodedBlockWriter m_encodedBlockWriter;
private final BZip2EncoderExecutorServiceImpl m_executorService;
private final boolean m_iCreatedExecutor;
private final int m_hashCode = HASH_CODE_GENERATOR.getAndIncrement();
private boolean m_closed;
private long m_pos = 0;
private static void writeFileHeader(OutputStream os, int blockSize) throws IOException
{
// File header
os.write('B');
os.write('Z');
// File version
os.write('h');
// Block size as a character. The ASCII code for 0 is 48.
os.write(blockSize + 48);
}
/**
* Create a new bzip2 compressing output stream with default settings.
* @param wrapped Compressed data is written to this stream.
* @throws IOException On errors writing the file header.
* @see #BZip2OutputStream(OutputStream, BZip2OutputStreamSettings)
*/
public BZip2OutputStream(OutputStream wrapped) throws IOException
{
this(wrapped, new BZip2OutputStreamSettings());
}
/**
* Create a new bzip2 compressing output stream.
* @param wrapped Compressed data is written to this stream.
* @param settings Compression settings.
* @throws IOException On errors writing the file header.
* @see #BZip2OutputStream(OutputStream)
*/
public BZip2OutputStream(OutputStream wrapped, BZip2OutputStreamSettings settings) throws IOException
{
// Null checks
wrapped.getClass();
settings.getClass();
m_wrapped = new LittleEndianBitOutputStream(wrapped);
// bzip2 uses 1kb == 1000b
m_blockSize = settings.getBlockSize() * 100 * 1000;
writeFileHeader(wrapped, settings.getBlockSize());
EncodingScratchpad sp;
if (settings.getExecutorService() != null)
{
// Use the supplied executor service
// There is only one allowed implementation for now.
m_executorService = (BZip2EncoderExecutorServiceImpl) settings.getExecutorService();
m_iCreatedExecutor = false;
m_encodedBlockWriter = new EncodedBlockWriter(m_wrapped);
// Each encoder thread has its own scratchpad
sp = null;
}
else if (settings.getNumberOfEncoderThreads() > 0)
{
// Use separate encoder threads.
m_executorService = new BZip2EncoderExecutorServiceImpl(settings.getNumberOfEncoderThreads(), new SingleObserverErrorState());
m_iCreatedExecutor = true;
m_encodedBlockWriter = new EncodedBlockWriter(m_wrapped);
// Each encoder thread has its own scratchpad
sp = null;
}
else
{
// Encode in the thread writing to the stream.
m_executorService = null;
m_iCreatedExecutor = false;
sp = new EncodingScratchpad();
m_encodedBlockWriter = null;
}
m_blockOutputStream = new BlockOutputStream(m_wrapped, m_blockSize, settings.getNumberOfHuffmanTreeRefinementIterations() , m_executorService, this, m_encodedBlockWriter, sp);
}
private void assertNotClosed() throws IOException
{
if (m_closed)
{
throw new IOException("This stream is closed");
}
}
private void checkErrorState() throws IOException, RuntimeException
{
if (m_executorService != null)
{
m_executorService.getErrorState().checkAndClearErrors(this);
}
}
private void debug(String msg)
{
}
private void writeEosBlock() throws IOException
{
// Write the end of stream magic
for (int i = 0; i < EOS_MAGIC.length; i++)
{
m_wrapped.writeBitsLittleEndian(EOS_MAGIC[i] & 0xFF, 8);
}
// Write file checksum
m_wrapped.writeBitsLittleEndian(m_blockOutputStream.getFileChecksum(), 32);
m_wrapped.padToByteBoundary();
}
@Override
public void write(int b) throws IOException
{
assertNotClosed();
checkErrorState();
m_pos++;
m_blockOutputStream.write(b & 0xFF);
}
@Override
public void write(byte[] data) throws IOException
{
assertNotClosed();
checkErrorState();
m_pos += data.length;
m_blockOutputStream.write(data);
}
@Override
public void write(byte[] data, int offset, int len) throws IOException, IndexOutOfBoundsException
{
assertNotClosed();
checkErrorState();
if (offset < 0)
{
throw new IndexOutOfBoundsException("Offset: " + offset);
}
if (len < 0)
{
throw new IndexOutOfBoundsException("Length: " + len);
}
if (offset + len > data.length)
{
throw new IndexOutOfBoundsException("Offset: " + offset + " + Length: " + len + " > length of data: " + data.length);
}
m_pos += len;
m_blockOutputStream.write(data, offset, len);
}
@Override
public void close() throws IOException
{
checkErrorState();
if (!m_closed)
{
// This writes out any remaining run length encoding data and closes
// the block output stream.
m_blockOutputStream.close();
if ((m_pos > 0) && (m_encodedBlockWriter != null))
{
// Wait for all blocks to be written.
try
{
m_encodedBlockWriter.waitFor();
}
catch (InterruptedException e)
{
// Repackage
throw new IOException("Interrupted. The output file is most likely corrupted.");
}
checkErrorState();
}
writeEosBlock();
m_wrapped.close();
debug("Original size: " + m_pos + ", compressed size: " + m_wrapped.getNumberOfBytesWritten());
if (m_iCreatedExecutor && (m_executorService != null))
{
m_executorService.shutdown();
}
m_closed = true;
super.close();
}
}
@Override
public int hashCode()
{
return m_hashCode;
}
@Override
public boolean equals(Object o)
{
return this == o;
}
/**
* Close the stream if the client has been sloppy about it.
*/
@Override
protected void finalize() throws Throwable
{
close();
super.finalize();
}
/**
* Create a {@link BZip2EncoderExecutorService} that can be shared between
* several {@link BZip2OutputStream}:s to spread the bzip2 encoding work
* over several threads. The created executor service can be passed to the
* {@link BZip2OutputStream} constructor in a
* {@link BZip2OutputStreamSettings} object.
* @param noThreads The number of threads available to the executor.
* @return The executor service.
*/
public static BZip2EncoderExecutorService createExecutorService(int noThreads)
{
return new BZip2EncoderExecutorServiceImpl(noThreads, new MultipleObserverErrorState());
}
/**
* Create a {@link BZip2EncoderExecutorService} that can be shared between
* several {@link BZip2OutputStream}:s to spread the bzip2 encoding work
* over several threads. The created executor service can be passed to the
* {@link BZip2OutputStream} constructor in a
* {@link BZip2OutputStreamSettings} object.
* <p>
* The created executor will have as many threads available to it as there
* are CPU:s available to the JVM.
* @return The executor service.
*/
public static BZip2EncoderExecutorService createExecutorService()
{
return createExecutorService(Runtime.getRuntime().availableProcessors());
}
}

View File

@ -0,0 +1,223 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import org.at4j.support.lang.At4JException;
/**
* This object contains settings for the {@link BZip2OutputStream}.
* <p>
* When created, this object contains the default settings. Modify the settings
* by calling setter methods on this object.
* @author Karl Gustafsson
* @since 1.1
* @see BZip2OutputStream
*/
public class BZip2OutputStreamSettings implements Cloneable
{
/**
* The minimum size of an encoded data block in hundreds of kilobytes. Using
* a small block size gives faster but worse compression.
*/
public static final int MIN_BLOCK_SIZE = 1;
/**
* The maximum size of an encoded data block in hundreds of kilobytes. Using
* a large block size gives slower but better compression.
*/
public static final int MAX_BLOCK_SIZE = 9;
/**
* The default block size.
*/
public static final int DEFAULT_BLOCK_SIZE = MAX_BLOCK_SIZE;
/**
* The default number of Huffman tree refinement iterations. By having more
* tree refinement iterations the compression gets better, but as the number
* is increased the returns are diminishing.
*/
public static final int DEFAULT_NO_OF_HUFFMAN_TREE_REFINEMENT_ITERATIONS = 5;
/**
* The default number of encoder threads.
*/
public static final int DEFAULT_NO_OF_ENCODER_THREADS = 0;
private int m_blockSize = DEFAULT_BLOCK_SIZE;
private int m_numberOfHuffmanTreeRefinementIterations = DEFAULT_NO_OF_HUFFMAN_TREE_REFINEMENT_ITERATIONS;
private int m_numberOfEncoderThreads = DEFAULT_NO_OF_ENCODER_THREADS;
private BZip2EncoderExecutorService m_executorService;
/**
* Set the size of compressed data blocks. A high block size gives good but
* slow compression. A low block size gives worse but faster compression.
* <p>
* The default block size is 9 (the highest permitted value).
* @param bs The block size in hundreds of kilobytes. This should be between
* 1 and 9 (inclusive).
* @return {@code this}
* @throws IllegalArgumentException If the block size is not in the
* permitted range.
*/
public BZip2OutputStreamSettings setBlockSize(int bs) throws IllegalArgumentException
{
if (bs < MIN_BLOCK_SIZE || bs > MAX_BLOCK_SIZE)
{
throw new IllegalArgumentException("Invalid block size " + bs + ". It must be between " + MIN_BLOCK_SIZE + " and " + MAX_BLOCK_SIZE + " (inclusive)");
}
m_blockSize = bs;
return this;
}
/**
* Get the block size for a compressed data block.
* @return The block size for a compressed data block.
*/
public int getBlockSize()
{
return m_blockSize;
}
/**
* Set the number of tree refinement iterations that are run when creating
* Huffman trees for each compressed data block.
* <p>
* A higher value for this parameter should give better but slower
* compression. As the value increases the returns are diminishing.
* <p>
* The default value is five refinement iterations.
* @param no The number of Huffman tree refinement iterations. This should
* be a positive integer larger than zero.
* @return {@code this}
* @throws IllegalArgumentException If the number is not a positive integer
* larger than zero.
*/
public BZip2OutputStreamSettings setNumberOfHuffmanTreeRefinementIterations(int no) throws IllegalArgumentException
{
if (no < 1)
{
throw new IllegalArgumentException("Invalid value " + no + ". It must be greater than zero");
}
m_numberOfHuffmanTreeRefinementIterations = no;
return this;
}
/**
* Get the number of Huffman tree refinement iterations.
* @return The number of Huffman tree refinement iterations.
*/
public int getNumberOfHuffmanTreeRefinementIterations()
{
return m_numberOfHuffmanTreeRefinementIterations;
}
/**
* Set a for logging diagnostic output to. Output is
* logged to the debug and trace levels.
* <p>
* By default no log adapter is used and hence no diagnostic output is
* logged.
* @param la A log adapter.
* @return {@code this}
*/
public BZip2OutputStreamSettings setLogAdapter(Object la)
{
return this;
}
/**
* Set the number of encoder threads used for bzip2 compressing data. bzip2
* encoding is CPU intensive and giving the encoder more threads to work
* with can drastically shorten the encoding time. The drawback is that the
* memory consumption grows since each encoder thread must keep its data in
* memory.
* <p>
* The default number of encoder threads is zero, which means that the
* thread that is writing the data to the {@link BZip2OutputStream} will be
* used for the encoding.
* <p>
* For the shortest encoding time, use as many threads as there are
* available CPU:s in the system.
* @param no The number of encoder threads to use. If this is set to {@code
* 0}, the encoding will be done in the thread writing to the stream.
* @return {@code this}
* @throws IllegalArgumentException If {@code no} is negative.
* @see #setExecutorService(BZip2EncoderExecutorService)
*/
public BZip2OutputStreamSettings setNumberOfEncoderThreads(int no) throws IllegalArgumentException
{
if (no < 0)
{
throw new IllegalArgumentException("Invalid number of encoder threads " + no + ". The number must be zero or greater");
}
m_numberOfEncoderThreads = no;
return this;
}
public int getNumberOfEncoderThreads()
{
return m_numberOfEncoderThreads;
}
/**
* Set an executor service that the {@link BZip2OutputStream} will use to
* spread the encoding over several threads. This executor can be shared
* among several {@link BZip2OutputStream} objects.
* <p>
* If an executor service is set using this method, all threads that are
* available to the executor is used for the encoding and any value set
* using {@link #setNumberOfEncoderThreads(int)} is ignored.
* <p>
* An executor service is created using the
* {@link BZip2OutputStream#createExecutorService()} or the
* {@link BZip2OutputStream#createExecutorService(int)} method.
* @param executorService The executor service.
* @return {@code this}
* @see #setNumberOfEncoderThreads(int)
*/
public BZip2OutputStreamSettings setExecutorService(BZip2EncoderExecutorService executorService)
{
m_executorService = executorService;
return this;
}
public BZip2EncoderExecutorService getExecutorService()
{
return m_executorService;
}
/**
* Make a copy of this object.
*/
@Override
public BZip2OutputStreamSettings clone()
{
try
{
return (BZip2OutputStreamSettings) super.clone();
}
catch (CloneNotSupportedException e)
{
throw new At4JException("Bug", e);
}
}
}

View File

@ -0,0 +1,29 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* Interface identifying a bzip2 data block. Used by the {@link BlockDecoder}.
* @author Karl Gustafsson
* @since 1.1
*/
interface Block
{
// Nothing
}

View File

@ -0,0 +1,422 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.util.Arrays;
import org.at4j.support.comp.ByteMoveToFront;
import org.at4j.support.comp.IntMoveToFront;
import org.at4j.support.io.LittleEndianBitInputStream;
import org.at4j.support.lang.At4JException;
import org.at4j.support.lang.UnsignedInteger;
/**
* This is used by the {@link BZip2InputStream} to decode data blocks.
* @author Karl Gustafsson
* @since 1.1
*/
final class BlockDecoder
{
// The magic number identifying a block of compressed data
private static final byte[] COMPRESSED_BLOCK_MAGIC = new byte[] { (byte) 0x31, (byte) 0x41, (byte) 0x59, (byte) 0x26, (byte) 0x53, (byte) 0x59 };
// The magic number identifying the end of stream block
private static final byte[] EOS_BLOCK_MAGIC = new byte[] { (byte) 0x17, (byte) 0x72, (byte) 0x45, (byte) 0x38, (byte) 0x50, (byte) 0x90 };
// The number of symbols to read from each Huffman tree before switching
private static final int SYMBOLS_TO_READ_FROM_EACH_TREE = 50;
// The symbol value of the special RUNA symbol.
private static final int RUNA_SYMBOL = 0;
// The symbol value of the special RUNB symbol.
private static final int RUNB_SYMBOL = 1;
private static final int MAX_NO_OF_MTF_SYMBOLS = 258;
private static final byte[] INITIAL_MOVE_TO_FRONT_ALPHABET = new byte[MAX_NO_OF_MTF_SYMBOLS];
static
{
for (int i = 0; i < MAX_NO_OF_MTF_SYMBOLS; i++)
{
INITIAL_MOVE_TO_FRONT_ALPHABET[i] = (byte) i;
}
}
private final LittleEndianBitInputStream m_in;
private final int m_blockSize;
// Data read from the block header
// Block checksum (CRC)
private int m_readBlockChecksum;
// The pointer to the original data used in the BW transform
private int m_originalDataPointer;
// The Huffman trees used for decompression
private HighValueBranchHuffmanTree[] m_huffmanTrees;
// The EOB (End Of Block) symbol index.
private int m_endOfBlockSymbol;
// The number of times that the Huffman trees are switched in the input.
// The trees are switched every 50 bytes.
private int m_numberOfTimesHuffmanTreesAreSwitched;
private int[] m_treeUse;
// Mapping between symbol values and byte values.
private byte[] m_symbolSequenceNos;
// Frequency of each byte in the pre-BW data
private int[] m_byteFrequencies;
// State variables
// The number of the currently selected Huffman tree
private HighValueBranchHuffmanTree m_curTree;
// The number of symbols left to read from the current Huffman tree
private int m_symbolsLeftToReadFromCurTree;
// The current number of Huffman tree switches
private int m_switchNo;
// A counter for the number of bytes decoded in this block.
private int m_noBytesDecoded;
private ByteMoveToFront m_mtfTransformer;
// This will hold the decoded data (before the Burrows Wheeler decoding)
private final byte[] m_decoded;
BlockDecoder(LittleEndianBitInputStream in, int blockSize)
{
m_in = in;
m_blockSize = blockSize;
m_decoded = new byte[blockSize];
}
private void throwIOException(String msg) throws IOException
{
throw new IOException(msg + ". Position in input stream: " + m_in.getNumberOfBytesRead());
}
private void checkInterrupted() throws InterruptedException
{
if (Thread.interrupted())
{
throw new InterruptedException();
}
}
private void trace(String s)
{
System.out.println(s);
}
static HighValueBranchHuffmanTree decodeHuffmanTree(final int totalNumberOfSymbols, final LittleEndianBitInputStream in) throws IOException
{
int[] symbolLengths = new int[totalNumberOfSymbols];
// Starting bit length for Huffman deltas in this tree
int currentBitLength = in.readBits(5);
if (currentBitLength > 20)
{
throw new IOException("Invalid starting bit length for Huffman deltas: " + currentBitLength + ". Must be <= 20");
}
// Initialize min and max lengths per tree with values that
// will certainly be overwritten.
int minBitLengthPerTree = 20;
int maxBitLengthPerTree = 0;
for (int j = 0; j < totalNumberOfSymbols; j++)
{
while (in.readBit())
{
currentBitLength += in.readBit() ? -1 : 1;
if ((currentBitLength < 1) || (currentBitLength > 20))
{
throw new IOException("Invalid bit length " + currentBitLength);
}
}
symbolLengths[j] = currentBitLength;
if (currentBitLength < minBitLengthPerTree)
{
minBitLengthPerTree = currentBitLength;
}
if (currentBitLength > maxBitLengthPerTree)
{
maxBitLengthPerTree = currentBitLength;
}
}
return new HighValueBranchHuffmanTree(symbolLengths, minBitLengthPerTree, maxBitLengthPerTree, false);
}
private void readCompressedBlockHeader() throws IOException
{
byte[] barr = new byte[4];
// Block checksum
m_readBlockChecksum = (int) UnsignedInteger.fromLittleEndianByteArrayToLong(m_in.readBytes(barr, 0, 4), 0);
// Randomized block?
if (m_in.readBit())
{
throwIOException("Randomized block mode is not supported");
}
// Starting pointer into BWT
m_in.readBytes(barr, 1, 3);
barr[0] = 0;
m_originalDataPointer = (int) UnsignedInteger.fromLittleEndianByteArrayToLong(barr, 0);
if (m_originalDataPointer > m_blockSize)
{
throw new IOException("Invalid starting pointer " + m_originalDataPointer + ". It must be less than the block size " + m_blockSize);
}
// Huffman used codes
boolean[] usedSymbols = new boolean[256];
int numberOfUsedSymbols = 0;
boolean[] inUseBlocks = new boolean[16];
for (int i = 0; i < 16; i++)
{
inUseBlocks[i] = m_in.readBit();
}
for (int i = 0; i < 16; i++)
{
if (inUseBlocks[i])
{
for (int j = 0; j < 16; j++)
{
if (m_in.readBit())
{
usedSymbols[i * 16 + j] = true;
numberOfUsedSymbols++;
}
}
}
}
if (numberOfUsedSymbols == 0)
{
throwIOException("No symbols used in table");
}
// Create a mapping for the sequence numbers of all used bytes
m_symbolSequenceNos = new byte[numberOfUsedSymbols];
int useIndex = 0;
for (int i = 0; i < 256; i++)
{
if (usedSymbols[i])
{
m_symbolSequenceNos[useIndex++] = (byte) (i & 0xFF);
}
}
assert useIndex == numberOfUsedSymbols;
m_byteFrequencies = new int[256];
// The number of Huffman trees to use
int numberOfHuffmanTrees = m_in.readBits(3);
if (numberOfHuffmanTrees < 2 || numberOfHuffmanTrees > 6)
{
throwIOException("Invalid number of Huffman trees " + numberOfHuffmanTrees + ". Must be between 2 and 6 (inclusive)");
}
// The number of times the trees to use are swapped in the input.
// The trees are swapped each 50 bytes.
m_numberOfTimesHuffmanTreesAreSwitched = m_in.readBitsLittleEndian(15);
if (m_numberOfTimesHuffmanTreesAreSwitched < 1)
{
throwIOException("Invalid number of times the Huffman trees are switched in the input: " + m_numberOfTimesHuffmanTreesAreSwitched);
}
// Zero-terminated bit runs for each tree switch
int[] treeUseMtf = new int[m_numberOfTimesHuffmanTreesAreSwitched];
for (int i = 0; i < m_numberOfTimesHuffmanTreesAreSwitched; i++)
{
treeUseMtf[i] = 0;
while (m_in.readBit())
{
treeUseMtf[i]++;
}
if (treeUseMtf[i] > numberOfHuffmanTrees)
{
throwIOException("Invalid Huffman tree use MTF " + treeUseMtf[i] + ". Must be less than the number of Huffman trees, " + numberOfHuffmanTrees);
}
}
// Decode the tree use MTF values
m_treeUse = new int[m_numberOfTimesHuffmanTreesAreSwitched];
// The "alphabet" for the MTF encoding -- the indices of the different
// tree uses.
int[] treeUseIndices = new int[numberOfHuffmanTrees];
for (int i = 0; i < numberOfHuffmanTrees; i++)
{
treeUseIndices[i] = i;
}
new IntMoveToFront(treeUseIndices).decode(treeUseMtf, m_treeUse);
// Settings for the Huffman trees
// The total number of used symbols is the value we calculated above - 1
// + RUNA, RUNB and an end of stream marker.
int totalNumberOfSymbols = numberOfUsedSymbols + 2;
m_huffmanTrees = new HighValueBranchHuffmanTree[numberOfHuffmanTrees];
for (int i = 0; i < numberOfHuffmanTrees; i++)
{
m_huffmanTrees[i] = decodeHuffmanTree(totalNumberOfSymbols, m_in);
}
// The symbol value for the end of the data block.
m_endOfBlockSymbol = totalNumberOfSymbols - 1;
}
private void selectNewHuffmanTree() throws IOException
{
if (m_switchNo >= m_numberOfTimesHuffmanTreesAreSwitched)
{
throwIOException("One Huffman tree switch too many: " + m_switchNo);
}
m_symbolsLeftToReadFromCurTree = SYMBOLS_TO_READ_FROM_EACH_TREE;
m_curTree = m_huffmanTrees[m_treeUse[m_switchNo]];
m_switchNo++;
}
private int readSymbol() throws IOException
{
if (m_symbolsLeftToReadFromCurTree == 0)
{
selectNewHuffmanTree();
}
final int symbol = m_curTree.readNext(m_in);
m_symbolsLeftToReadFromCurTree--;
return symbol;
}
private void decodeSingleByte(final int symbolMtf) throws IOException
{
// Move To Front decode the symbol
final int byteIndex = m_mtfTransformer.decode(symbolMtf - 1) & 0xFF;
final byte value = m_symbolSequenceNos[byteIndex];
m_decoded[m_noBytesDecoded++] = value;
m_byteFrequencies[value & 0xFF]++;
}
// returns the next symbol
private int handleRunaAndRunb(int symbol) throws IOException
{
int n = 1;
int multiplier = 0;
while (symbol == RUNA_SYMBOL || symbol == RUNB_SYMBOL)
{
if (symbol == RUNA_SYMBOL)
{
multiplier += n;
}
else
{
multiplier += 2 * n;
}
// Multiply n with 2
n <<= 1;
symbol = readSymbol();
}
// The repeated value is at the front of the MTF list
final int byteIndex = m_mtfTransformer.decode(0) & 0xFF;
final byte value = m_symbolSequenceNos[byteIndex];
if (multiplier == 1)
{
m_decoded[m_noBytesDecoded++] = value;
m_byteFrequencies[value & 0xFF]++;
}
else
{
Arrays.fill(m_decoded, m_noBytesDecoded, m_noBytesDecoded + multiplier, value);
m_noBytesDecoded += multiplier;
m_byteFrequencies[value & 0xFF] += multiplier;
}
return symbol;
}
CompressedDataBlock readCompressedDataBlock() throws IOException, InterruptedException
{
readCompressedBlockHeader();
int symbol = readSymbol();
while (true)
{
checkInterrupted();
if (symbol == RUNA_SYMBOL || symbol == RUNB_SYMBOL)
{
symbol = handleRunaAndRunb(symbol);
}
else if (symbol == m_endOfBlockSymbol)
{
BurrowsWheelerDecoder bwd = new BurrowsWheelerDecoder(m_decoded, m_noBytesDecoded, m_byteFrequencies, m_originalDataPointer);
return new CompressedDataBlock(new RLEDecodingInputStream(bwd.decode(), m_readBlockChecksum), m_readBlockChecksum);
}
else
{
decodeSingleByte(symbol);
symbol = readSymbol();
}
}
}
private void initDecoderState()
{
// Initialize the MTF alphabet
final byte[] moveToFrontAlphabet = new byte[MAX_NO_OF_MTF_SYMBOLS];
System.arraycopy(INITIAL_MOVE_TO_FRONT_ALPHABET, 0, moveToFrontAlphabet, 0, MAX_NO_OF_MTF_SYMBOLS);
m_mtfTransformer = new ByteMoveToFront(moveToFrontAlphabet);
m_curTree = null;
m_symbolsLeftToReadFromCurTree = 0;
m_switchNo = 0;
m_noBytesDecoded = 0;
}
Block getNextBlock() throws IOException
{
initDecoderState();
byte[] barr = new byte[6];
m_in.readBytes(barr, 0, 6);
if (Arrays.equals(COMPRESSED_BLOCK_MAGIC, barr))
{
trace("Found block of compressed data");
try
{
return readCompressedDataBlock();
}
catch (InterruptedException e)
{
throw new At4JException(e);
}
}
else if (Arrays.equals(EOS_BLOCK_MAGIC, barr))
{
trace("Found end of stream block");
m_in.readBytes(barr, 0, 4);
int readCrc32 = (int) UnsignedInteger.fromLittleEndianByteArrayToLong(barr, 0);
return new EosBlock(readCrc32);
}
else
{
throwIOException("Invalid block header " + Arrays.toString(barr) + ". Expected compressed data block or end of stream block");
// Never reached
return null;
}
}
}

View File

@ -0,0 +1,54 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.at4j.support.io.BitOutput;
/**
* This callback is called by the {@link BlockEncoder} when it has encoded its
* block.
* @author Karl Gustafsson
* @since 1.1
*/
final class BlockEncodedCallback
{
private final int m_blockNo;
private final EncodedBlockWriter m_writer;
private final ByteArrayOutputStream m_byteOut;
private final BitOutput m_bitOut;
BlockEncodedCallback(final int blockNo, final ByteArrayOutputStream byteOut, final BitOutput bitOut, final EncodedBlockWriter writer)
{
m_blockNo = blockNo;
m_writer = writer;
m_byteOut = byteOut;
m_bitOut = bitOut;
}
/**
* This is called by the {@link BlockEncoder} when it is done.
*/
void reportBlockDone() throws IOException
{
m_writer.writeBlock(m_blockNo, new EncodedBlockData(m_byteOut.toByteArray(), m_bitOut.getNumberOfBitsInUnfinishedByte(), m_bitOut.getUnfinishedByte()));
}
}

View File

@ -0,0 +1,893 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.util.Arrays;
import org.at4j.comp.bzip2.BurrowsWheelerEncoder.BurrowsWheelerEncodingResult;
import org.at4j.support.comp.IntMoveToFront;
import org.at4j.support.io.BitOutput;
/**
* This is used by the thread encoding a bzip2 block.
* @author Karl Gustafsson
* @since 1.1
*/
final class BlockEncoder
{
private static final byte[] BLOCK_MAGIC = new byte[] { 0x31, 0x41, 0x59, 0x26, 0x53, 0x59 };
// The maximum Huffman tree depth
private static final int MAX_HUFFMAN_BIT_LENGTH = 17;
// The values of the RUNA and RUNB symbols
private static final int RUNA_SYMBOL = 0;
private static final int RUNB_SYMBOL = 1;
private static final int MIN_NO_OF_HUFFMAN_TREES = 2;
static final int MAX_NO_OF_HUFFMAN_TREES = 6;
// The maximum number of different MTF symbols: 256 bytes + RUNA + RUNB +
// EOB - one byte (the first symbol does not have to be encoded thanks to
// MTF and RLE)
static final int MAX_NO_OF_MTF_SYMBOLS = 258;
// Write 50 symbols, then swap Huffman trees.
static final int NO_OF_SYMBOLS_PER_SEGMENT = 50;
// Categories used when optimizing Huffman trees
// For each tree length, in which category does a segment belong depending
// on its encoded length percentage?
static final int[][] CATEGORY_PER_NO_OF_TREES_AND_PERCENTAGE = new int[][] {
// Two trees: cutoff at 30%
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
// Three trees: cutoff at 18% and 45%
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
// Four trees: cutoff at 15%, 30% and 55%
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
// Five trees: cutoff at 12%, 25%, 40% and 60%
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
// Six trees: cutoff at 8%, 25%, 36%, 51% and 63%
{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 } };
private static final byte[] INITIAL_MTF_ALPHABET = new byte[MAX_NO_OF_MTF_SYMBOLS];
static
{
for (int i = 0; i < INITIAL_MTF_ALPHABET.length; i++)
{
INITIAL_MTF_ALPHABET[i] = (byte) (i & 0xFF);
}
}
private final byte[] m_block;
private final int m_blockNo;
private final int m_blockSize;
private final int m_blockChecksum;
// Bit flags indicating which bytes that occur at least once in this block
private final boolean[] m_seenDifferentBytes;
// The number of different bytes seen in this block
private final int m_numberOfSeenDifferentBytes;
private final int m_numberOfHuffmanTreeRefinementIterations;
// Sink to write encoded data to.
private final BitOutput m_out;
// This callback is called when the block encoder is done. It may be null.
private final BlockEncodedCallback m_blockEncoderCallback;
// This is set by the encoding thread before calling encode
private EncodingScratchpad m_scratchpad;
BlockEncoder(final byte[] block, final int blockNo, final int blockSize, final int blockChecksum, final boolean[] seenDifferentBytes, final int numberOfSeenDifferentBytes, final int numberOfHuffmanTreeRefinementIterations,
final BitOutput out, final BlockEncodedCallback bec)
{
m_block = block;
m_blockNo = blockNo;
m_blockSize = blockSize;
m_blockChecksum = blockChecksum;
m_seenDifferentBytes = seenDifferentBytes;
m_numberOfSeenDifferentBytes = numberOfSeenDifferentBytes;
m_numberOfHuffmanTreeRefinementIterations = numberOfHuffmanTreeRefinementIterations;
m_out = out;
m_blockEncoderCallback = bec;
}
void setScratchpad(EncodingScratchpad sp)
{
m_scratchpad = sp;
}
/**
* Get the seen byte values in the current block.
*/
private byte[] getSeenByteValues()
{
byte[] res = new byte[m_numberOfSeenDifferentBytes];
int j = 0;
for (int i = 0; i < 256; i++)
{
if (m_seenDifferentBytes[i])
{
res[j++] = (byte) (i & 0xFF);
}
}
assert j == m_numberOfSeenDifferentBytes;
return res;
}
/**
* Add RUNA and RUNB symbols to {@code res} at {@code outIndex} to represent
* {@code no} repetitions of the previous symbol.
* <p>
* This method is declared package-protected for the unit tests.
* @return The number of symbols added. outIndex should be incremented by
* this value by the caller.
*/
static int addRunaAndRunb(int[] res, int outIndex, int no)
{
int noWritten = 0;
while (no > 0)
{
switch (no % 2)
{
case 1:
res[outIndex + noWritten++] = RUNA_SYMBOL;
no -= 1;
break;
case 0:
res[outIndex + noWritten++] = RUNB_SYMBOL;
no -= 2;
break;
default:
// Should not occur unless we use relativistic arithmetic or
// something...
throw new RuntimeException();
}
no >>>= 1;
}
return noWritten;
}
/**
* Create a mapping between symbols and their index numbers in the array of
* symbols.
* @param symbols The symbols.
* @return An array containing the index number for each symbol that occurs
* in {@code symbols}.
*/
private byte[] createSequenceMap(byte[] symbols)
{
byte[] res = m_scratchpad.m_sequenceMap;
byte index = 0;
for (int i = 0; i < symbols.length; i++)
{
res[symbols[i] & 0xFF] = index++;
}
return res;
}
private static class MTFAndRLEResult
{
// The encoded data as MTF symbols.
private final int[] m_encodedData;
private final int m_dataLen;
private final int m_noSeenDifferentSymbols;
private MTFAndRLEResult(int[] symbols, int dataLen, int noSeenDifferentSymbols)
{
m_encodedData = symbols;
m_dataLen = dataLen;
m_noSeenDifferentSymbols = noSeenDifferentSymbols;
}
}
/**
* Run MTF and RLE encoding of the data in {@code data}.
* @param data The data to encode.
* @param dataLen The data length.
* @param symbols An array containing all different symbols that occur in
* {@code data}.
* @return MTF and RLE encoded data.
*/
private MTFAndRLEResult moveToFrontAndRunLengthEncode(final byte[] data, final int dataLen, final byte[] symbols)
{
// This array will contain the run length encoded result. The result
// will probably be shorter than data.length thanks to the run length
// encoding, but data.length (+ 1 for the EOB symbol) is the worst case
// length.
boolean[] seenSymbols = new boolean[259];
// RUNA and RUNB are always seen (even when they are not...)
seenSymbols[0] = true;
seenSymbols[1] = true;
int noSeenSymbols = 2;
// Initialize the move to front alphabet
final byte[] mtfAlphabet = m_scratchpad.m_mtfAlphabet;
System.arraycopy(INITIAL_MTF_ALPHABET, 0, mtfAlphabet, 0, mtfAlphabet.length);
// The array to store the encoded data in.
final int[] encodedData = m_scratchpad.m_encodedData;
// Create a mapping between a symbol and its index number in the array
// of symbols
final byte[] sequenceMap = createSequenceMap(symbols);
int lastSymbolIndex = 0;
int curOutArrayIndex = 0;
// A counter to keep track of the number of equal symbols in a row for
// the run length encoding
int noSame = 0;
for (int curInArrayIndex = 0; curInArrayIndex < dataLen; curInArrayIndex++)
{
final byte curSymbolIndex = sequenceMap[data[curInArrayIndex] & 0xFF];
if (curSymbolIndex == lastSymbolIndex)
{
noSame++;
}
else
{
if (noSame > 0)
{
// Run length encode
curOutArrayIndex += addRunaAndRunb(m_scratchpad.m_encodedData, curOutArrayIndex, noSame);
noSame = 0;
}
// Search for the current symbol in the MTF alphabet and count
// the distance
int j = 0;
byte lastMtf = mtfAlphabet[0];
while (mtfAlphabet[++j] != curSymbolIndex)
{
final byte nextLastMtf = mtfAlphabet[j];
mtfAlphabet[j] = lastMtf;
lastMtf = nextLastMtf;
}
// Swap the symbols in the MTF alphabet.
mtfAlphabet[j] = lastMtf;
mtfAlphabet[0] = curSymbolIndex;
// Output the distance. Distance 1 gets the value 2 since
// RUNA and RUNB have the values 0 and 1.
int symbolVal = j + 1;
encodedData[curOutArrayIndex++] = symbolVal;
if (!seenSymbols[symbolVal])
{
seenSymbols[symbolVal] = true;
noSeenSymbols++;
}
lastSymbolIndex = curSymbolIndex;
}
}
if (noSame > 0)
{
// One last run length encoding
curOutArrayIndex += addRunaAndRunb(encodedData, curOutArrayIndex, noSame);
}
return new MTFAndRLEResult(encodedData, curOutArrayIndex, noSeenSymbols);
}
private static class EncodeAllSegmentsResult
{
// The shortest encoded segment length for all segments.
private int m_shortestLength;
// The longest encoded segment length for all segments.
private int m_longestLength;
// A list with encoding results (the bit length) for each segment and
// tree.
private int[][] m_encodingResults;
// For each segment, the index of the tree that gave the shortest
// encoded block.
private int[] m_treesUsed;
}
/**
* Encode all 50-byte segments with all trees and count the encoded lengths.
* By doing this we can select the best Huffman tree for each segment by
* seeing which tree that gave the shortest encoded data.
* @param data The data to encode.
* @param dataLen The length of the data. (This may be shorter than the
* {@code data} array.)
* @param codeLengths An array of code lengths for each symbol for each
* investigated Huffman tree.
* @param numberOfHuffmanSegments The number of 50-byte segments in the
* current block.
* @param numberOfDifferentSymbols The number of different symbols in the
* data. This is the value of the EOB symbol + 1.
* @param res The result of the operation is stored in this object.
*/
private void encodeAllSegmentsWithAllTrees(final int[] data, final int dataLen, final int[][] codeLengths, final int numberOfHuffmanSegments, final int numberOfDifferentSymbols, final EncodeAllSegmentsResult res) throws IOException
{
final int noTrees = codeLengths.length;
final int[][] encodingResults = m_scratchpad.m_encodingResults;
// The best tree for each segment
final int[] treesUsed = new int[numberOfHuffmanSegments];
// The shortest seen shortest length for all segments
int shortestLength = Integer.MAX_VALUE;
// The longest seen -shortest- length for all segments
int longestLength = 0;
for (int segmentNo = 0; segmentNo < numberOfHuffmanSegments; segmentNo++)
{
// Encode this segment with all Huffman trees
int shortestLengthForSegment = Integer.MAX_VALUE;
int bestTreeIndex = 0;
final int[] segmentEncodingResultPerTree = new int[noTrees];
final int segmentStart = segmentNo * NO_OF_SYMBOLS_PER_SEGMENT;
final int segmentEnd = Math.min(segmentStart + NO_OF_SYMBOLS_PER_SEGMENT, dataLen);
for (int treeNo = 0; treeNo < noTrees; treeNo++)
{
final int[] curTreeCodeLengths = codeLengths[treeNo];
int bitLen = 0;
for (int j = segmentStart; j < segmentEnd; j++)
{
bitLen += curTreeCodeLengths[data[j]];
}
if (treeNo == 0)
{
shortestLengthForSegment = bitLen;
}
else if (bitLen < shortestLengthForSegment)
{
shortestLengthForSegment = bitLen;
bestTreeIndex = treeNo;
}
segmentEncodingResultPerTree[treeNo] = bitLen;
}
if (segmentNo == 0)
{
shortestLength = longestLength = shortestLengthForSegment;
}
// Don't count the length of the last segment since that is likely
// to contain less than 50 symbols.
else if ((segmentNo < (numberOfHuffmanSegments - 1)) && (shortestLengthForSegment < shortestLength))
{
shortestLength = shortestLengthForSegment;
}
else if (shortestLengthForSegment > longestLength)
{
longestLength = shortestLengthForSegment;
}
encodingResults[segmentNo] = segmentEncodingResultPerTree;
treesUsed[segmentNo] = bestTreeIndex;
}
res.m_encodingResults = encodingResults;
res.m_longestLength = longestLength;
res.m_shortestLength = shortestLength;
res.m_treesUsed = treesUsed;
}
/**
* Divide all segments into x categories based on how well they were encoded
* by the globally optimal Huffman tree. An optimal Huffman tree is created
* for each category.
* @param data The data to encode.
* @param dataLen The length of the data.
* @param eobSymbol The value of the special EOB symbol. This is the highest
* used symbol value.
* @param numberOfHuffmanTrees The number of Huffman trees to create.
* @param numberOfSegments The number of 50-byte segments in the block.
* @param easr The encoding results from encoding the data with the globally
* optimal Huffman tree.
* @param globallyOptimalTree The symbol code lengths for the globally
* optimal Huffman tree.
* @return The symbols code lengths for each created tree.
*/
private int[][] createNewTrees(final int[] data, final int dataLen, final int eobSymbol, final int numberOfHuffmanTrees, final int numberOfSegments, final EncodeAllSegmentsResult easr, final int[] globallyOptimalTree)
{
// Clear the frequencies array
final int[][] frequencies = m_scratchpad.m_frequencies2d;
for (int i = 0; i < numberOfHuffmanTrees; i++)
{
Arrays.fill(frequencies[i], 0);
}
// How big difference in number of bits is there between the shortest
// and the longest encoded segment?
final int maxDistance = easr.m_longestLength - easr.m_shortestLength;
if (maxDistance == 0)
{
// Nothing to do. We're as optimal as can be.
return new int[][] { globallyOptimalTree };
}
final int numberOfCategories = numberOfHuffmanTrees;
// Which category does each 50-byte segment fall into?
final int[] categoryPerSegment = m_scratchpad.m_categoriesPerSegment;
// How many 50-byte segments fall into each category?
final int[] noSegmentsPerCategory = new int[numberOfCategories];
// This array is used to determine which category a segment falls into
// based on its encoded length.
final int[] catArray = CATEGORY_PER_NO_OF_TREES_AND_PERCENTAGE[numberOfHuffmanTrees - 2];
// Don't include the last segment in the statistics since that is likely
// to be shorter
for (int i = 0; i < numberOfSegments - 1; i++)
{
// The shortest length for this segment.
final int segmentLen = easr.m_encodingResults[i][easr.m_treesUsed[i]];
final int percentage = (100 * (segmentLen - easr.m_shortestLength)) / maxDistance;
assert percentage >= 0;
assert percentage <= 100;
final int catNo = catArray[percentage];
noSegmentsPerCategory[catNo]++;
categoryPerSegment[i] = catNo;
}
for (int i = 0; i < numberOfSegments; i++)
{
final int segmentStart = i * NO_OF_SYMBOLS_PER_SEGMENT;
final int segmentEnd = Math.min(segmentStart + NO_OF_SYMBOLS_PER_SEGMENT, dataLen);
final int[] curCatFreqs = frequencies[categoryPerSegment[i]];
for (int j = segmentStart; j < segmentEnd; j++)
{
curCatFreqs[data[j]]++;
}
}
int noNewTrees = 0;
for (int i = 0; i < numberOfCategories; i++)
{
if (noSegmentsPerCategory[i] > 0)
{
// Create a new Huffman tree for this category.
noNewTrees++;
}
}
assert noNewTrees > 0;
int[][] res = new int[noNewTrees][];
int treeNo = 0;
for (int i = 0; i < numberOfCategories; i++)
{
if (noSegmentsPerCategory[i] > 0)
{
res[treeNo++] = HighValueBranchHuffmanTree.createCodeLengths(frequencies[i], eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
}
}
return res;
}
/**
* Refine the Huffman trees based on the encoding results. For each tree,
* make it optimal based on the data in the segments that it was the best
* tree for.
* @param data The data to encode.
* @param dataLen The length of the data to encode.
* @param codeLengths The code length for each symbol for each tree.
* @param easr The results when encoding the data with this set of trees.
* @param eobSymbol The value of the EOB symbol. This is the highest symbol
* value.
* @return Symbol code lengths for the refined trees.
*/
private int[][] refineTreesBasedOnEncodingResults(final int[] data, final int dataLen, final int[][] codeLengths, final EncodeAllSegmentsResult easr, final int eobSymbol)
{
// Clear the frequencies array
final int[][] frequencies = m_scratchpad.m_frequencies2d;
for (int i = 0; i < codeLengths.length; i++)
{
Arrays.fill(frequencies[i], 0);
}
int segmentNo = 0;
int noInSegment = 0;
int curTree = easr.m_treesUsed[segmentNo];
for (int i = 0; i < dataLen; i++)
{
int symbolVal = data[i];
frequencies[curTree][symbolVal]++;
if (++noInSegment == NO_OF_SYMBOLS_PER_SEGMENT)
{
segmentNo++;
// If the data length is a multiple of 50, we do a switch after
// encoding the last symbol which will make segmentNo greater
// than the index of the last element in easr.m_treesUsed.
// Thus the check below.
if (segmentNo < easr.m_treesUsed.length)
{
curTree = easr.m_treesUsed[segmentNo];
}
noInSegment = 0;
}
}
// Recreate the trees based on the gathered frequencies
int[][] res = new int[codeLengths.length][];
for (int i = 0; i < codeLengths.length; i++)
{
res[i] = HighValueBranchHuffmanTree.createCodeLengths(frequencies[i], eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
}
return res;
}
/**
* Get the number of Huffman trees to use based on the number of 50-byte
* segments in the data.
*/
private byte getNumberOfHuffmanTrees(int noSegments)
{
// Values from bzip2
if (noSegments < 200)
{
return 2;
}
else if (noSegments < 600)
{
return 3;
}
else if (noSegments < 1200)
{
return 4;
}
else if (noSegments < 2400)
{
return 5;
}
else
{
return 6;
}
}
/**
* Get the minimum and maximum code length from the array.
* @return An int array containing the minimum and the maximum code lengths,
* in that order.
*/
private int[] getMinAndMaxCodeLengths(final int[] codeLengths)
{
int minLength = codeLengths[0];
int maxLength = codeLengths[0];
for (int i = 1; i < codeLengths.length; i++)
{
if (codeLengths[i] < minLength)
{
minLength = codeLengths[i];
}
else if (codeLengths[i] > maxLength)
{
maxLength = codeLengths[i];
}
}
return new int[] { minLength, maxLength };
}
/**
* Create the Huffman trees that should be used for encoding the current
* block. First, an globally optimal tree is created. Then new trees are
* created from information on how well the globally optimal tree encoded
* different segments. Lastly, the created trees are optimized based on the
* data in the segments that they are used to encode. This last step is
* repeated a configurable number of times ({@code
* m_numberOfHuffmanTreeRefinementIterations}).
* @param data The data that should be encoded using the created Huffman
* trees.
* @param dataLen The length of the data, excluding the trailing EOB symbol.
* @param noSymbolsUsed The number of different symbols used in the data.
*/
private HuffmanTreesAndUsage createHuffmanTrees(final int[] data, final int dataLen, final int noSymbolsUsed) throws IOException
{
HuffmanTreesAndUsage res = new HuffmanTreesAndUsage();
// The maximum possible number of trees.
// +1 == EOB symbol
res.m_noHuffmanSegments = ((dataLen - 1 + 1) / NO_OF_SYMBOLS_PER_SEGMENT) + 1;
// Create a Huffman tree for the entire input.
// Count the frequencies of the different bytes in the input.
int[] frequencies = m_scratchpad.m_frequencies;
Arrays.fill(frequencies, 0);
// The maximum symbol value used (before the EOB symbol) is at least 1
// (RUNB).
int maxSymbolValue = 1;
for (int j = 0; j < dataLen; j++)
{
int symbolVal = data[j];
frequencies[symbolVal]++;
if (symbolVal > maxSymbolValue)
{
maxSymbolValue = symbolVal;
}
}
// Now we can infer the value of the EOB (End Of Block) symbol. Add it
// to the end of the data. The data array is created so there should be
// room for it.
res.m_eobSymbol = maxSymbolValue + 1;
frequencies[res.m_eobSymbol] = 1;
data[dataLen] = res.m_eobSymbol;
final int dataLenIncEob = dataLen + 1;
// Maybe we're already done?
if (res.m_noHuffmanSegments < MIN_NO_OF_HUFFMAN_TREES)
{
// We have to encode at least two trees anyway.
res.m_trees = new HighValueBranchHuffmanTree[MIN_NO_OF_HUFFMAN_TREES];
int[] codeLengths = HighValueBranchHuffmanTree.createCodeLengths(frequencies, res.m_eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
int[] minAndMaxLength = getMinAndMaxCodeLengths(codeLengths);
HighValueBranchHuffmanTree tree = new HighValueBranchHuffmanTree(codeLengths, minAndMaxLength[0], minAndMaxLength[1], true);
for (int i = 0; i < MIN_NO_OF_HUFFMAN_TREES; i++)
{
res.m_trees[i] = tree;
}
// Use tree #0 for all segments
res.m_treeUsage = new int[res.m_noHuffmanSegments];
}
else
{
final int[][][] huffmanCodeLengths = new int[m_numberOfHuffmanTreeRefinementIterations + 1][][];
final int[] codeLengthsForGloballyOptimalTree = HighValueBranchHuffmanTree.createCodeLengths(frequencies, res.m_eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
final EncodeAllSegmentsResult easr = new EncodeAllSegmentsResult();
encodeAllSegmentsWithAllTrees(data, dataLen, new int[][] { codeLengthsForGloballyOptimalTree }, res.m_noHuffmanSegments, res.m_eobSymbol + 1, easr);
huffmanCodeLengths[0] = createNewTrees(data, dataLen, res.m_eobSymbol, getNumberOfHuffmanTrees(res.m_noHuffmanSegments), res.m_noHuffmanSegments, easr, codeLengthsForGloballyOptimalTree);
// Select the set of trees that gives the shortest total data length
int bestIndex = -1;
int bestLength = Integer.MAX_VALUE;
int[] bestTreeUsage = null;
for (int i = 0; i < huffmanCodeLengths.length; i++)
{
if (i > 0)
{
// Refine the trees
huffmanCodeLengths[i] = refineTreesBasedOnEncodingResults(data, dataLenIncEob, huffmanCodeLengths[i - 1], easr, res.m_eobSymbol);
}
encodeAllSegmentsWithAllTrees(data, dataLenIncEob, huffmanCodeLengths[i], res.m_noHuffmanSegments, res.m_eobSymbol + 1, easr);
int totLen = 0;
for (int j = 0; j < easr.m_treesUsed.length; j++)
{
totLen += easr.m_encodingResults[j][easr.m_treesUsed[j]];
}
// Previously the length of each encoded tree was added to the
// total length. That had negligible effect on the total encoded
// length and a small impact on the performance.
if (totLen < bestLength)
{
bestIndex = i;
bestLength = totLen;
bestTreeUsage = easr.m_treesUsed;
}
}
int noTrees = huffmanCodeLengths[bestIndex].length;
if (noTrees < MIN_NO_OF_HUFFMAN_TREES)
{
res.m_trees = new HighValueBranchHuffmanTree[MIN_NO_OF_HUFFMAN_TREES];
int[] minAndMaxLength = getMinAndMaxCodeLengths(huffmanCodeLengths[bestIndex][0]);
for (int i = 0; i < MIN_NO_OF_HUFFMAN_TREES; i++)
{
res.m_trees[i] = new HighValueBranchHuffmanTree(huffmanCodeLengths[bestIndex][0], minAndMaxLength[0], minAndMaxLength[1], true);
}
}
else
{
res.m_trees = new HighValueBranchHuffmanTree[huffmanCodeLengths[bestIndex].length];
for (int i = 0; i < huffmanCodeLengths[bestIndex].length; i++)
{
int[] minAndMaxLengths = getMinAndMaxCodeLengths(huffmanCodeLengths[bestIndex][i]);
res.m_trees[i] = new HighValueBranchHuffmanTree(huffmanCodeLengths[bestIndex][i], minAndMaxLengths[0], minAndMaxLengths[1], true);
}
}
res.m_treeUsage = bestTreeUsage;
}
return res;
}
/**
* Encode the Huffman tree and write it to the output.
* @param tree The tree to encode.
* @param numberOfDifferentSymbols The number of different symbols in the
* tree.
* @param out The output to write the tree to.
*/
static void encodeHuffmanTree(final HighValueBranchHuffmanTree tree, final int numberOfDifferentSymbols, final BitOutput out) throws IOException
{
// Huffman bit length for the first symbol (0..17)
int len = tree.getBitLength(0);
out.writeBitsLittleEndian(len, 5);
// Encode a delta length compared to the previous length for each
// symbol.
for (int j = 0; j < numberOfDifferentSymbols; j++)
{
int prevLen = len;
len = tree.getBitLength(j);
while (len != prevLen)
{
// Alter length
out.writeBit(true);
if (prevLen < len)
{
// Make longer
out.writeBit(false);
prevLen++;
}
else
{
// Make shorter
out.writeBit(true);
prevLen--;
}
}
// We are at the right length
out.writeBit(false);
}
}
/**
* Write the block header for an encoded data block.
* @param blockChecksum The block checksum.
* @param bwFirstPointer The pointer to the first element in the Burrows
* Wheeler encoded data.
* @param seenDifferentBytes Bit flags that are switched on for all bytes
* that are seen in the written data.
* @param mtfrle Results from the MTF and RLE encodings.
* @param htau The different Huffman trees and information on when they are
* used.
*/
private void writeBlockHeader(final int blockChecksum, int bwFirstPointer, boolean[] seenDifferentBytes, MTFAndRLEResult mtfrle, HuffmanTreesAndUsage htau) throws IOException
{
// Block magic
for (int i = 0; i < BLOCK_MAGIC.length; i++)
{
m_out.writeBitsLittleEndian(BLOCK_MAGIC[i] & 0xFF, 8);
}
// Checksum
m_out.writeBitsLittleEndian(blockChecksum, 32);
// Randomized? (no)
m_out.writeBit(false);
// Starting pointer into Burrows Wheeler matrix (24 bits)
m_out.writeBitsLittleEndian(bwFirstPointer, 24);
boolean[] segmentsWithData = new boolean[16];
boolean[][] seenData = new boolean[16][16];
for (int i = 0; i < 256; i++)
{
if (seenDifferentBytes[i])
{
segmentsWithData[i / 16] = true;
seenData[i / 16][i % 16] = true;
}
}
// Write a flag for each block of 16 bytes that have at least one byte
// occurring in the encoded data.
for (int i = 0; i < 16; i++)
{
m_out.writeBit(segmentsWithData[i]);
}
// For each block used, write a flag for each of the used bytes in that
// block.
for (int i = 0; i < 16; i++)
{
if (segmentsWithData[i])
{
for (int j = 0; j < 16; j++)
{
m_out.writeBit(seenData[i][j]);
}
}
}
// The number of Huffman trees used (2..6)
m_out.writeBits(htau.m_trees.length, 3);
// The number of times the Huffman trees are switched (each 50 bytes)
m_out.writeBitsLittleEndian(htau.m_noHuffmanSegments, 15);
// Which Huffman tree is selected at each switch? Use a zero-terminated
// bit run of MTF:ed index values
// Init the MTF alphabet
int[] mtfAlpha = new int[htau.m_trees.length];
for (int i = 0; i < htau.m_trees.length; i++)
{
mtfAlpha[i] = i;
}
int[] treeUsageMtf = new int[htau.m_noHuffmanSegments];
new IntMoveToFront(mtfAlpha).encode(htau.m_treeUsage, treeUsageMtf);
for (int i = 0; i < htau.m_noHuffmanSegments; i++)
{
// A zero-terminated bit run for the values 0..5
int val = 0;
while (val < treeUsageMtf[i])
{
m_out.writeBit(true);
val++;
}
m_out.writeBit(false);
}
// Encode each Huffman tree
for (int i = 0; i < htau.m_trees.length; i++)
{
encodeHuffmanTree(htau.m_trees[i], htau.m_eobSymbol + 1, m_out);
}
}
private static class HuffmanTreesAndUsage
{
private HighValueBranchHuffmanTree[] m_trees;
private int m_noHuffmanSegments;
private int[] m_treeUsage;
private int m_eobSymbol;
}
void encode() throws IOException
{
// Fix the block overshoot. Copy DATA_OVERSHOOT bytes to the end of the
// array. Repeat the data if the block is shorter than DATA_OVERSHOOT
// bytes.
int noCopied = 0;
while (noCopied < ThreeWayRadixQuicksort.DATA_OVERSHOOT)
{
int noToCopy = Math.min(ThreeWayRadixQuicksort.DATA_OVERSHOOT - noCopied, m_blockSize);
System.arraycopy(m_block, 0, m_block, m_blockSize + noCopied, noToCopy);
noCopied += noToCopy;
}
// Sort the data in the block.
// data contains the written data after the initial move to front
// transformation
BurrowsWheelerEncodingResult burrWhee = new BurrowsWheelerEncoder(m_block, m_blockSize, m_scratchpad).encode();
// Run Move to front and run length encoding transformations on the
// Burrows Wheeler encoded data
MTFAndRLEResult rleMtfSymbols = moveToFrontAndRunLengthEncode(burrWhee.m_lastColumn, m_blockSize, getSeenByteValues());
int[] encodedData = rleMtfSymbols.m_encodedData;
// Create the Huffman trees. This method also infers the value of the
// EOB symbol and adds it to the end of the encodedData array.
HuffmanTreesAndUsage htau = createHuffmanTrees(rleMtfSymbols.m_encodedData, rleMtfSymbols.m_dataLen, rleMtfSymbols.m_noSeenDifferentSymbols);
writeBlockHeader(m_blockChecksum, burrWhee.m_firstPointer, m_seenDifferentBytes, rleMtfSymbols, htau);
// Write the Huffman encoded data. The EOB symbol is last in the data.
int swapNo = 0;
int noLeftUntilSwap = 1;
HighValueBranchHuffmanTree curTree = null;
// +1 == EOB symbol
for (int i = 0; i < rleMtfSymbols.m_dataLen + 1; i++)
{
if (--noLeftUntilSwap == 0)
{
curTree = htau.m_trees[htau.m_treeUsage[swapNo++]];
noLeftUntilSwap = NO_OF_SYMBOLS_PER_SEGMENT;
}
curTree.write(m_out, encodedData[i]);
}
assert swapNo == htau.m_noHuffmanSegments;
if (m_blockEncoderCallback != null)
{
m_blockEncoderCallback.reportBlockDone();
}
}
}

View File

@ -0,0 +1,62 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
/**
* This is used by the {@link BlockOutputStream} to encode a block in a separate
* encoding thread. It uses a {@link BlockEncoder} to do the actual encoding.
* @author Karl Gustafsson
* @since 1.1
*/
final class BlockEncoderRunnable implements Runnable
{
private final BlockEncoder m_encoder;
private final Object m_errorOwner;
BlockEncoderRunnable(final BlockEncoder be, final Object errorOwner)
{
m_encoder = be;
m_errorOwner = errorOwner;
}
public void run()
{
try
{
m_encoder.setScratchpad(((EncodingThread) Thread.currentThread()).getScratchpad());
m_encoder.encode();
}
catch (IOException e)
{
((EncodingThread) Thread.currentThread()).getErrorState().registerError(e, m_errorOwner);
}
catch (RuntimeException e)
{
((EncodingThread) Thread.currentThread()).getErrorState().registerError(e, m_errorOwner);
}
catch (Error e)
{
((EncodingThread) Thread.currentThread()).getErrorState().registerError(e, m_errorOwner);
}
}
}

View File

@ -0,0 +1,355 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import org.at4j.support.io.BitOutput;
import org.at4j.support.io.LittleEndianBitOutputStream;
/**
* Used by {@link BZip2OutputStream} to RLE encode data and then write it to
* compressed blocks.
* @author Karl Gustafsson
* @since 1.1
*/
final class BlockOutputStream extends OutputStream
{
/**
* The different states of the run length encoder.
*/
private static enum RLEState
{
ENCODING_SINGLE, COUNTING_MULTIPLE;
}
// The maximum number of encoded repeated bytes.
private static final int MAX_NO_OF_RLE_REPEATS = 251;
// The state of the run length encoder.
private RLEState m_rleState;
// The last byte value that write was called with. Used to keep track of
// the run length encoding.
private int m_last = -1;
// How many equal bytes in a row has write been called with. Used to keep
// track of the run length encoding.
private int m_numberOfSame;
// Encoded data is written to this.
private final BitOutput m_wrapped;
// The size of a Burrows Wheeler block, in bytes.
private final int m_blockSize;
// How many times should the Huffman trees be refined before encoding data?
private final int m_numberOfHuffmanTreeRefinementIterations;
// Bit flags indicating which bytes that occur at least once in the current
// block.
private boolean[] m_seenDifferentBytesInCurBlock;
// The data in the current block.
private byte[] m_block;
// If we are using separate encoding threads, this executor is used to
// schedule blocks for execution. Otherwise it is null.
private final BZip2EncoderExecutorServiceImpl m_encodingExecutor;
// A token identifying who owns the errors that may be caused by jobs that
// we might schedule in the executor. This is null if no executor is used.
private final Object m_errorOwner;
// Contains preallocated data structures. Used to reduce the number of
// temporary objects that are created and thus avoid time spent gc:ing.
// This is null if an executor is used for encoding.
private final EncodingScratchpad m_scratchpad;
// If we use several encoder threads, this object is used for writing the
// encoded blocks in the right order. Otherwise it is null.
private final EncodedBlockWriter m_encodedBlockWriter;
// The checksum for the current block.
private CRC m_blockChecksum;
// The checksum for the entire file.
private int m_fileChecksum = 0;
// The number of different bytes seen in the current block.
private int m_noSeenDifferentBytesInCurBlock;
private int m_blockPointer;
private int m_blockNo = 0;
BlockOutputStream(BitOutput wrapped, int blockSize, int numberOfHuffmanTreeRefinementIterations, BZip2EncoderExecutorServiceImpl ex, Object errorOwner, EncodedBlockWriter ebw, EncodingScratchpad sp)
{
// Can only have one, not both.
assert ex == null ^ sp == null;
m_wrapped = wrapped;
m_blockSize = blockSize;
m_numberOfHuffmanTreeRefinementIterations = numberOfHuffmanTreeRefinementIterations;
m_blockChecksum = new CRC();
m_scratchpad = sp;
// May be null.
m_encodingExecutor = ex;
// May be null
m_errorOwner = errorOwner;
// May be null.
m_encodedBlockWriter = ebw;
startNewBlock();
}
private void startNewBlock()
{
m_blockPointer = 0;
if (m_encodingExecutor != null)
{
// We use several threads for encoding. Create new instances for
// data that may be used right now by an encoder.
m_seenDifferentBytesInCurBlock = new boolean[256];
m_block = new byte[m_blockSize + ThreeWayRadixQuicksort.DATA_OVERSHOOT];
}
else
{
// We encode in this thread. It is safe to reuse variables.
if (m_seenDifferentBytesInCurBlock == null)
{
m_seenDifferentBytesInCurBlock = new boolean[256];
}
else
{
Arrays.fill(m_seenDifferentBytesInCurBlock, false);
}
if (m_block == null)
{
m_block = new byte[m_blockSize + ThreeWayRadixQuicksort.DATA_OVERSHOOT];
}
}
m_noSeenDifferentBytesInCurBlock = 0;
// Reset the run length encoder state
m_last = -1;
m_numberOfSame = 0;
m_rleState = RLEState.ENCODING_SINGLE;
}
private boolean isFull()
{
return m_blockPointer == m_blockSize;
}
private boolean isEmpty()
{
return m_blockPointer == 0;
}
int getFileChecksum()
{
return m_fileChecksum;
}
/**
* Write a compressed data block.
*/
private void writeCurBlock() throws IOException
{
final int blockChecksum = m_blockChecksum.getValue();
m_blockChecksum = new CRC();
if (m_encodingExecutor == null)
{
// Encode the block in the current thread.
BlockEncoder be = new BlockEncoder(m_block, m_blockNo, m_blockPointer, blockChecksum, m_seenDifferentBytesInCurBlock, m_noSeenDifferentBytesInCurBlock, m_numberOfHuffmanTreeRefinementIterations, m_wrapped, null);
be.setScratchpad(m_scratchpad);
be.encode();
}
else
{
// Hand off the block to another thread for encoding.
// Allocate an output buffer that is 2/3rds of the size of the
// written data.
ByteArrayOutputStream baos = new ByteArrayOutputStream((2 * m_blockPointer) / 3);
BitOutput out = new LittleEndianBitOutputStream(baos);
BlockEncodedCallback bec = new BlockEncodedCallback(m_blockNo, baos, out, m_encodedBlockWriter);
BlockEncoder be = new BlockEncoder(m_block, m_blockNo, m_blockPointer, blockChecksum, m_seenDifferentBytesInCurBlock, m_noSeenDifferentBytesInCurBlock, m_numberOfHuffmanTreeRefinementIterations, out, bec);
m_encodingExecutor.execute(new BlockEncoderRunnable(be, m_errorOwner));
}
// Update the file checksum
m_fileChecksum = (m_fileChecksum << 1) | (m_fileChecksum >>> 31);
m_fileChecksum ^= blockChecksum;
m_blockNo++;
}
/**
* Write a single byte.
*/
private void writeByte(final int b) throws IOException
{
m_block[m_blockPointer++] = (byte) (b & 0xFF);
if (!m_seenDifferentBytesInCurBlock[b])
{
m_seenDifferentBytesInCurBlock[b] = true;
m_noSeenDifferentBytesInCurBlock++;
}
if (isFull())
{
// File f = new File("/tmp/block_" + ++m_blockNo + ".dat");
// OutputStream os = new BufferedOutputStream(new FileOutputStream(f));
// try
// {
// os.write(m_block, 0, m_blockPointer);
// }
// finally
// {
// os.close();
// }
writeCurBlock();
startNewBlock();
}
}
@Override
public void write(final int b) throws IOException
{
// Run length encode
switch (m_rleState)
{
case ENCODING_SINGLE:
if (b == m_last)
{
m_numberOfSame++;
if (m_numberOfSame == 4)
{
if (m_blockPointer == m_blockSize - 1)
{
// Corner case. bzip2 cannot handle blocks that end
// with four equal bytes. End this block one byte
// earlier.
writeCurBlock();
startNewBlock();
write(b);
return;
}
else
{
// Four equal in a row. Change state
m_rleState = RLEState.COUNTING_MULTIPLE;
m_numberOfSame = 0;
}
}
}
else
{
m_last = b;
m_numberOfSame = 1;
}
m_blockChecksum.update(b);
writeByte(b);
break;
case COUNTING_MULTIPLE:
if (b == m_last)
{
m_numberOfSame++;
if (m_numberOfSame == MAX_NO_OF_RLE_REPEATS)
{
// Cannot repeat this anymore. Update checksum, write
// and switch state.
for (int i = 0; i < MAX_NO_OF_RLE_REPEATS; i++)
{
m_blockChecksum.update(b);
}
writeByte(MAX_NO_OF_RLE_REPEATS);
m_rleState = RLEState.ENCODING_SINGLE;
m_numberOfSame = 0;
}
}
else
{
// A byte that is not same as the last. Stop counting,
// update the checksum and change state.
for (int i = 0; i < m_numberOfSame; i++)
{
m_blockChecksum.update(m_last);
}
writeByte(m_numberOfSame);
m_blockChecksum.update(b);
writeByte(b);
m_numberOfSame = 1;
m_last = b;
m_rleState = RLEState.ENCODING_SINGLE;
}
break;
default:
throw new RuntimeException("Unknown encoding state " + m_rleState + ". This is a bug");
}
}
@Override
public void write(final byte[] data) throws IOException
{
for (int i = 0; i < data.length; i++)
{
write(data[i] & 0xFF);
}
}
@Override
public void write(final byte[] data, final int offset, final int len) throws IOException
{
// Range validation is done by BZip2OutputStream
for (int i = offset; i < offset + len; i++)
{
write(data[i] & 0xFF);
}
}
@Override
public void close() throws IOException
{
if (m_rleState == RLEState.COUNTING_MULTIPLE)
{
// Update the checksum and write the current count.
for (int i = 0; i < m_numberOfSame; i++)
{
m_blockChecksum.update(m_last & 0xFF);
}
writeByte(m_numberOfSame);
}
if (!isEmpty())
{
writeCurBlock();
}
if (m_encodedBlockWriter != null)
{
// Tell the encoded block writer that we're done.
m_encodedBlockWriter.writeBlock(m_blockNo, null);
}
// Don't close the wrapped BitOutput. It will be used later on to write
// the EOF block.
super.close();
}
}

View File

@ -0,0 +1,120 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.io.InputStream;
/**
* Decode Burrows Wheeler encoded data.
* @author Karl Gustafsson
* @since 1.1
*/
final class BurrowsWheelerDecoder
{
static class BWInputStream extends InputStream
{
private final byte[] m_decoded;
private final int[] m_ptr;
private int m_curPointer;
private boolean m_eof;
private int m_noLeftToRead;
BWInputStream(byte[] decoded, int[] ptr, int originalDataPointer)
{
m_decoded = decoded;
m_ptr = ptr;
m_curPointer = ptr[originalDataPointer];
m_noLeftToRead = ptr.length;
}
@Override
public int read() throws IOException
{
if (m_eof)
{
return -1;
}
final int res = m_decoded[m_curPointer] & 0xFF;
m_eof = --m_noLeftToRead == 0;
m_curPointer = m_ptr[m_curPointer];
return res;
}
}
private final byte[] m_decoded;
private final int m_noBytesDecoded;
private final int[] m_byteFrequencies;
private final int m_originalDataPointer;
/**
* @param encoded The encoded data. This array may be longer than the actual
* amount of encoded data. The {@code noBytesDecoded} parameter determines
* how much of the array that will be used.
* @param noBytesEncoded The length of the encoded data.
* @param byteFrequencies The number of times each byte occur in the data.
* @param originalDataPointer The row number of the original data in the
* Burrows Wheeler matrix.
* @throws IOException On I/O errors.
*/
BurrowsWheelerDecoder(byte[] encoded, int noBytesEncoded, int[] byteFrequencies, int originalDataPointer) throws IOException
{
if (originalDataPointer > noBytesEncoded)
{
throw new IOException("Invalid pointer to original data in block header " + originalDataPointer + ". It is larger than the size of data in the block " + noBytesEncoded);
}
m_decoded = encoded;
m_noBytesDecoded = noBytesEncoded;
m_byteFrequencies = byteFrequencies;
m_originalDataPointer = originalDataPointer;
}
InputStream decode()
{
// Calculate the transformation vector used to move from the encoded
// data to the decoded.
// The byte frequency array contains the frequency of each byte in the
// data. Create a new array tarr that, for each byte, specifies how many
// bytes of lower value that occurs in the data.
int[] tarr = new int[256];
tarr[0] = 0;
for (int i = 1; i < 256; i++)
{
tarr[i] = tarr[i - 1] + m_byteFrequencies[i - 1];
}
// The ptr array will contain a chain of positions of the decoded bytes
// in the decoded array.
final int[] ptr = new int[m_noBytesDecoded];
for (int i = 0; i < m_noBytesDecoded; i++)
{
int val = m_decoded[i] & 0xFF;
// Get the position of the decoded byte position in tt. Increment
// the tt position for the given value so that next occurrence of the
// value will end up in the next position in tt.
int ttPos = tarr[val]++;
ptr[ttPos] = i;
}
return new BWInputStream(m_decoded, ptr, m_originalDataPointer);
}
}

View File

@ -0,0 +1,99 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* Burrows Wheeler encoder.
* @author Karl Gustafsson
* @since 1.1
*/
final class BurrowsWheelerEncoder
{
static class BurrowsWheelerEncodingResult
{
// The values of the last column of the matrix
final byte[] m_lastColumn;
// The row number of the first row (the row which contains the incoming
// data) in the sorted matrix
final int m_firstPointer;
private BurrowsWheelerEncodingResult(byte[] lastColumn, int firstPointer)
{
m_lastColumn = lastColumn;
m_firstPointer = firstPointer;
}
}
// The shortest length that will be quicksorted rather than shell sorted
private static int MIN_QUICKSORT_LENGTH = 18;
// The data array containing the unencoded data.
private final byte[] m_data;
// The length of the data in the array. Data occupies the positions 0 to
// m_length - 1 in the array.
private final int m_length;
// Contains preallocated data structures. Used to reduce the number of
// temporary objects that are created and thus avoid time spent gc:ing.
private final EncodingScratchpad m_scratchpad;
/**
* @param data This array should contain a 100 byte overshoot. See
* {@link ThreeWayRadixQuicksort#ThreeWayRadixQuicksort(byte[], int, int, EncodingScratchpad)}
* .
*/
BurrowsWheelerEncoder(byte[] data, int length, EncodingScratchpad sp)
{
if (length > data.length)
{
throw new IllegalArgumentException("Invalid data length " + length + ". It must be <= the length of the data array (" + data.length + ")");
}
m_data = data;
m_length = length;
m_scratchpad = sp;
}
/**
* Run a Burrows Wheeler encoding.
*/
BurrowsWheelerEncodingResult encode()
{
// Create all rotations of m_data, put them in a matrix and sort the
// first column. For each row in the matrix, ptr contains a pointer to
// the first byte of the row's m_data rotation.
int[] ptr = new ThreeWayRadixQuicksort(m_data, m_length, MIN_QUICKSORT_LENGTH, m_scratchpad).sort();
// Get the contents of the last column in the matrix. This, and the
// pointer to the ĺocation of where the first byte in m_data is in the
// last column, is the result from the Burrows Wheeler encoding.
byte[] lastColumn = m_scratchpad.m_lastColumn;
int firstRow = -1;
for (int i = 0; i < m_length; i++)
{
int fePtr = ptr[i] - 1;
if (fePtr < 0)
{
fePtr += m_length;
firstRow = i;
}
lastColumn[i] = m_data[fePtr];
}
return new BurrowsWheelerEncodingResult(lastColumn, firstRow);
}
}

View File

@ -0,0 +1,63 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* Checksum algorithm used by bzip2.
* @author Karl Gustafsson
* @since 1.1
*/
final class CRC
{
// Table from bzip2's crctable.c
private static final int[] CRC_TABLE = new int[] { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a,
0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57,
0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c,
0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1,
0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044,
0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee,
0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc,
0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd,
0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667,
0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 };
private int m_crc = 0xFFFFFFFF;
/**
* @param b An integer value in the interval 0..255.
*/
void update(int b)
{
if ((b < 0) || (b > 255))
{
throw new IllegalArgumentException("" + b);
}
m_crc = (m_crc << 8) ^ CRC_TABLE[(m_crc >>> 24) ^ b];
}
int getValue()
{
return ~m_crc;
}
}

View File

@ -0,0 +1,51 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.InputStream;
/**
* A bzip2 block containing compressed data.
* @author Karl Gustafsson
* @since 1.1
*/
final class CompressedDataBlock implements Block
{
private final InputStream m_stream;
private final int m_blockChecksum;
CompressedDataBlock(InputStream stream, int blockChecksum)
{
// Null check
stream.getClass();
m_stream = stream;
m_blockChecksum = blockChecksum;
}
InputStream getStream()
{
return m_stream;
}
int getBlockChecksum()
{
return m_blockChecksum;
}
}

View File

@ -0,0 +1,38 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* This object contains data for an encoded bzip2 block.
* @author Karl Gustafsson
* @since 1.1
*/
final class EncodedBlockData
{
final byte[] m_bytes;
final int m_noBits;
final int m_bitValue;
EncodedBlockData(byte[] bytes, int noBits, int bitValue)
{
m_bytes = bytes;
m_noBits = noBits;
m_bitValue = bitValue;
}
}

View File

@ -0,0 +1,146 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import org.at4j.support.io.BitOutput;
/**
* This is used to write encoded blocks in the right order when several encoding
* threads are used with the {@link BZip2OutputStream}.
* @author Karl Gustafsson
* @since 1.1
*/
final class EncodedBlockWriter
{
// All variables are protected by this object's intrinsic lock
private final BitOutput m_out;
private final Map<Integer, EncodedBlockData> m_savedBlocks = new HashMap<Integer, EncodedBlockData>();
// This latch is used to signal to the bzip2 output stream when this writer
// is finished.
private final CountDownLatch m_doneLatch = new CountDownLatch(1);
private int m_nextBlockToWrite = 0;
private boolean m_hasError;
EncodedBlockWriter(BitOutput out)
{
m_out = out;
}
private void writeEncodedBlockData(final EncodedBlockData bd) throws IOException
{
m_out.writeBytes(bd.m_bytes, 0, bd.m_bytes.length);
if (bd.m_noBits > 0)
{
m_out.writeBits(bd.m_bitValue, bd.m_noBits);
}
}
private void writeBlockInternal(final int blockNo, final EncodedBlockData blockData) throws IOException
{
if (blockData == null)
{
// We're done
m_doneLatch.countDown();
}
else
{
writeEncodedBlockData(blockData);
while (m_savedBlocks.containsKey(++m_nextBlockToWrite))
{
final EncodedBlockData savedBd = m_savedBlocks.get(m_nextBlockToWrite);
if (savedBd != null)
{
writeEncodedBlockData(savedBd);
}
else
{
m_doneLatch.countDown();
break;
}
}
}
}
/**
* It is not time to write this block just yet. Save it until it is time.
* @param blockNo The block number.
* @param blockData The block data.
*/
private void saveBlock(final int blockNo, EncodedBlockData blockData)
{
m_savedBlocks.put(blockNo, blockData);
}
/**
* Write the block data to the output if it is the next block to write. If
* not, queue it for later writing.
* @param blockNo The block number.
* @param blockData The block data or {@code null} as an end of stream
* marker.
* @throws IOException
*/
synchronized void writeBlock(final int blockNo, final EncodedBlockData blockData) throws IOException
{
if (m_hasError)
{
return;
}
try
{
if (blockNo == m_nextBlockToWrite)
{
writeBlockInternal(blockNo, blockData);
}
else
{
saveBlock(blockNo, blockData);
}
}
catch (Error e)
{
m_hasError = true;
m_doneLatch.countDown();
throw e;
}
catch (RuntimeException e)
{
m_hasError = true;
m_doneLatch.countDown();
throw e;
}
catch (IOException e)
{
m_hasError = true;
m_doneLatch.countDown();
throw e;
}
}
void waitFor() throws InterruptedException
{
m_doneLatch.await();
}
}

View File

@ -0,0 +1,107 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* This object contains different objects used by a bzip2 encoder thread. It is
* used to reduce the number of object and array allocations.
* @author Karl Gustafsson
* @since 1.1
*/
final class EncodingScratchpad
{
private static final int MAX_BLOCK_LENGTH = BZip2OutputStreamSettings.MAX_BLOCK_SIZE * 100 * 1000;
private static final int MAX_NO_OF_SEGMENTS = MAX_BLOCK_LENGTH / BlockEncoder.NO_OF_SYMBOLS_PER_SEGMENT;
// An array that may contain the frequencies of each symbol in the data.
final int[] m_frequencies = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS];
// A move to front alphabet.
final byte[] m_mtfAlphabet = new byte[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS];
// This two dimensional array can contain the frequencies for the different
// symbols encoded by the different trees (up to six trees)
final int[][] m_frequencies2d = new int[BlockEncoder.MAX_NO_OF_HUFFMAN_TREES][BlockEncoder.MAX_NO_OF_MTF_SYMBOLS];
// Contains MTF and RL encoded data before the Huffman encoding. The maximum
// size is the maximum size of a block + the EOB symbol. The actual size
// will probably be significantly shorter than this
final int[] m_encodedData = new int[MAX_BLOCK_LENGTH + 1];
// Frequencies of each two-byte combination used for the radix sort.
// Use an overshoot of one position.
final int[] m_twoByteFrequencies = new int[65536 + 1];
// Pointers created by the 3-way radix quicksort
final int[] m_ptrs = new int[MAX_BLOCK_LENGTH];
// A cache for sort results
final int[] m_sortCache = new int[MAX_BLOCK_LENGTH + ThreeWayRadixQuicksort.DATA_OVERSHOOT];
// Array for temporary data. This will be grown incrementally as the need
// arises.
int[] m_tempArea = new int[1024];
// Stack for block sorting
final ThreeWayRadixQuicksort.QuickSortRangeInfo[] m_sortStack = new ThreeWayRadixQuicksort.QuickSortRangeInfo[ThreeWayRadixQuicksort.SORT_STACK_SIZE];
// The results when all segments of a block is encoded with all available
// Huffman trees
final int[][] m_encodingResults = new int[MAX_NO_OF_SEGMENTS][BlockEncoder.MAX_NO_OF_HUFFMAN_TREES];
final int[] m_categoriesPerSegment = new int[MAX_NO_OF_SEGMENTS];
// The last column after Burrows Wheeler encoding
final byte[] m_lastColumn = new byte[MAX_BLOCK_LENGTH];
// The bucket sorting order
final int[] m_sortOrder = new int[256];
// Used when scanning pointers
final int[] m_copyStart = new int[256];
final int[] m_copyEnd = new int[256];
// Mapping between a symbol and its index number in the array of symbols
// used by the run length encoder.
final byte[] m_sequenceMap = new byte[256];
// Heap used when calculating Huffman tree code lengths
final int[] m_htHeap = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS + 2];
final int[] m_htWeight = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS * 2];
final int[] m_htParent = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS * 2];
// Flags for all sorted large buckets
final boolean[] m_sortedLargeBuckets = new boolean[256];
// Flags for all sorted small buckets
final boolean[] m_sortedSmallBuckets = new boolean[256 * 256];
/**
* Get a temporary integer array of with a length of at least {@code len}
* integers.
*/
int[] getTemp(final int len)
{
// Is the current temp area large enough?
if (m_tempArea.length < len)
{
// No. Reallocate it
m_tempArea = new int[len + 100];
}
return m_tempArea;
}
}

View File

@ -0,0 +1,49 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* This is the kind of thread used for encoding bzip2 blocks.
* @author Karl Gustafsson
* @since 1.1
*/
final class EncodingThread extends Thread
{
private final EncodingScratchpad m_scratchpad = new EncodingScratchpad();
private final ErrorState m_errorState;
EncodingThread(Runnable r, ErrorState es)
{
super(r);
m_errorState = es;
}
/**
* Get this thread's scratchpad.
*/
EncodingScratchpad getScratchpad()
{
return m_scratchpad;
}
ErrorState getErrorState()
{
return m_errorState;
}
}

View File

@ -0,0 +1,41 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.util.concurrent.ThreadFactory;
/**
* This is a factory for creating {@link EncodingThread} objects.
* @author Karl Gustafsson
* @since 1.1
*/
final class EncodingThreadFactory implements ThreadFactory
{
private final ErrorState m_errorState;
EncodingThreadFactory(ErrorState es)
{
m_errorState = es;
}
public Thread newThread(Runnable r)
{
return new EncodingThread(r, m_errorState);
}
}

View File

@ -0,0 +1,39 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
/**
* A bzip2 block containing end of stream information.
* @author Karl Gustafsson
* @since 1.1
*/
final class EosBlock implements Block
{
private final long m_readCrc;
EosBlock(long readCrc)
{
m_readCrc = readCrc;
}
long getReadCrc()
{
return m_readCrc;
}
}

View File

@ -0,0 +1,52 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
/**
* This is used to keep track of encoding errors.
* <p>
* Every error is registered with an owner token that is a unique identifier for
* the object that is affected by the error. The owner token object must have a
* good {@link Object#hashCode()} method.
* @author Karl Gustafsson
* @since 1.1
*/
interface ErrorState
{
/**
* Register an {@link Exception} or an {@link Error}.
* @param t The exception or error.
* @param ownerToken A unique identifier for the error owner, i.e. the
* object that the encoding thread is performing work for.
*/
void registerError(Throwable t, Object ownerToken);
/**
* Check for errors.
* @param ownerToken The owner.
* @throws Error If there is a registered {@link Error} for this owner.
* @throws RuntimeException If there is a registered
* {@link RuntimeException} for this owner.
* @throws IOException If there is a registered {@link IOException} for this
* owner.
*/
void checkAndClearErrors(Object ownerToken) throws Error, RuntimeException, IOException;
}

View File

@ -0,0 +1,438 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.util.Arrays;
import org.at4j.support.io.BitInput;
import org.at4j.support.io.BitOutput;
/**
* This object represents the type of Huffman tree that is used by bzip2. The
* "high value branch" means that leaf nodes have the smallest possible values
* and non-leaf nodes have the highest possible values at each tree depth.
* @author Karl Gustafsson
* @since 1.1
*/
final class HighValueBranchHuffmanTree
{
private static final int MAX_NO_OF_SYMBOLS = 258;
// The shortest code length for symbols in this tree.
private final int m_minLength;
// The longest code length for symbols in this tree.
private final int m_maxLength;
// m_maxLength - m_minLength + 1;
// Declared package private for the unit tests.
final int m_numberOfLengths;
// The value limit at each data length, i.e. the maximum value for leaf
// nodes at that data length.
// Declared package private for the unit tests.
final int[] m_limitsPerLength;
// The lowest value for a symbol at each length. The value for length
// m_minLength is at index 0 in the array.
// Declared package private for the unit tests.
final int[] m_baseValuesPerLength;
// The offset in the m_symbolSequenceNos array for the first symbol for each
// Huffman code length. The array has the length m_maxLength - m_minLength +
// 1. The value for m_minLength is at index 0 (and is 0).
// Declared package private for the unit tests.
final int[] m_symbolOffsetPerLength;
// The index of the symbol table for Huffman code no n.
// Declared package private for the unit tests.
final int[] m_symbolSequenceNos;
// This table contains the Huffman codes and the code bit lengths for each
// symbol. It is created when using the constructor that calculates the
// Huffman trees to speed up encoding.
final int[][] m_huffmanCodesAndLengthsPerSymbol;
/**
* Get the Huffman code and its bit length for a symbol.
* @param symbol The symbol.
* @param huffmanIndex The symbol's index in the list of sorted symbols.
* @param codeAndLength An int array of length 2 used to store the result
* in.
*/
private int[] getCodeAndLengthForSymbol(final int symbol, final int huffmanIndex, final int[] codeAndLength)
{
// Calculate the length of the synbol's Huffman code
int deltaLen;
for (deltaLen = 0; deltaLen < m_numberOfLengths - 1; deltaLen++)
{
if (huffmanIndex < m_symbolOffsetPerLength[deltaLen + 1])
{
break;
}
}
codeAndLength[0] = m_baseValuesPerLength[deltaLen] + (huffmanIndex - m_symbolOffsetPerLength[deltaLen]);
codeAndLength[1] = m_minLength + deltaLen;
return codeAndLength;
}
/**
* Create a canonical Huffman tree for the supplied symbols.
* <p>
* Symbol lengths for a canonical Huffman tree can be created by the
* {@link #createCodeLengths(int[], int, int)} method.
* @param symbolLengths The length of the Huffman code for each symbol.
* @param minLength The shortest Huffman code length in the tree.
* @param maxLength The longest Huffman code length in the tree.
* @param forEncoding Should the tree be used for encoding? If so, a loookup
* table that contains the Huffman code for each symbol is created to speed
* up the encoding.
* @throws IllegalArgumentException If the lengths are invalid.
*/
HighValueBranchHuffmanTree(final int[] symbolLengths, final int minLength, final int maxLength, final boolean forEncoding) throws IllegalArgumentException
{
if ((minLength < 0) || (maxLength < minLength))
{
throw new IllegalArgumentException("Illegal min or max length, min: " + minLength + ", max: " + maxLength);
}
final int numberOfSymbols = symbolLengths.length;
final int numberOfLengths = maxLength - minLength + 1;
// Create a array of symbol sequence numbers sorted on their symbol
// lengths
m_symbolSequenceNos = new int[numberOfSymbols];
// The number of symbols having each code length
final int[] numl = new int[numberOfLengths];
int index = 0;
for (int i = minLength; i <= maxLength; i++)
{
numl[i - minLength] = 0;
for (int j = 0; j < numberOfSymbols; j++)
{
if (symbolLengths[j] == i)
{
m_symbolSequenceNos[index++] = j;
numl[i - minLength]++;
}
}
}
m_symbolOffsetPerLength = new int[numberOfLengths];
m_symbolOffsetPerLength[0] = 0;
for (int i = 0; i < numberOfLengths - 1; i++)
{
m_symbolOffsetPerLength[i + 1] = m_symbolOffsetPerLength[i] + numl[i];
}
// The value limit at each length
m_limitsPerLength = new int[numberOfLengths - 1];
m_baseValuesPerLength = new int[numberOfLengths];
int prevLimit = 0;
for (int i = minLength; i <= maxLength; i++)
{
index = i - minLength;
// The base value for this length is the value of the smallest
// allowed symbol for this length. The smallest allowed symbol is
// the limit for the previous length with a zero at the end.
m_baseValuesPerLength[index] = prevLimit << 1;
if (i < maxLength)
{
// The limit for this length is the base value for this length
// plus the number of symbols for this length.
prevLimit = m_baseValuesPerLength[index] + numl[index];
m_limitsPerLength[index] = prevLimit - 1;
}
}
m_minLength = minLength;
m_maxLength = maxLength;
m_numberOfLengths = (byte) (maxLength - minLength + 1);
if (forEncoding)
{
// Create an inverse mapping into the list of sorted symbols
final int[] huffmanIndexPerSymbol = new int[symbolLengths.length];
Arrays.fill(huffmanIndexPerSymbol, -1);
for (int i = 0; i < m_symbolSequenceNos.length; i++)
{
huffmanIndexPerSymbol[m_symbolSequenceNos[i]] = i;
}
// Create a table containing the Huffman code and its bit length for
// each symbol. This is used to speed up writes.
m_huffmanCodesAndLengthsPerSymbol = new int[symbolLengths.length][2];
int[] codeAndLength = new int[2];
for (int i = 0; i < symbolLengths.length; i++)
{
codeAndLength = getCodeAndLengthForSymbol(i, huffmanIndexPerSymbol[i], codeAndLength);
m_huffmanCodesAndLengthsPerSymbol[i][0] = codeAndLength[0];
m_huffmanCodesAndLengthsPerSymbol[i][1] = codeAndLength[1];
}
}
else
{
// Don't create these variables. They are only used when writing data
// and it is assumed that this constructor will only be used to create
// trees for reading data.
m_huffmanCodesAndLengthsPerSymbol = null;
}
}
private static void upHeap(final int[] heap, final int[] weight, int nHeap)
{
int tmp = heap[nHeap];
while (weight[tmp] < weight[heap[nHeap >> 1]])
{
heap[nHeap] = heap[nHeap >>> 1];
nHeap >>>= 1;
}
heap[nHeap] = tmp;
}
private static void downHeap(final int[] heap, final int[] weight, final int nHeap, int n)
{
int tmp = heap[n];
while (true)
{
int yy = n << 1;
if (yy > nHeap)
{
break;
}
if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]])
{
yy++;
}
if (weight[tmp] < weight[heap[yy]])
{
break;
}
heap[n] = heap[yy];
n = yy;
}
heap[n] = tmp;
}
private static int addWeights(final int w1, final int w2)
{
final int d1 = w1 & 0xFF;
final int d2 = w2 & 0xFF;
final int ww1 = w1 & 0xFFFFFF00;
final int ww2 = w2 & 0xFFFFFF00;
return (ww1 + ww2) | (1 + (d1 > d2 ? d1 : d2));
}
int getMinLength()
{
return m_minLength;
}
int getMaxLength()
{
return m_maxLength;
}
/**
* Get a sorted array with symbol sequence numbers and their Huffman code
* lengths. The returned array is sorted with the most frequent occurring
* symbol first (i.e. the symbol with the shortest Huffman code).
* <p>
* This method is used for testing.
* @return Array a[n][0] = symbol, a[n][1] = Huffman code length
*/
int[][] getSortedSymbolSequenceNosAndCodeLengths()
{
int[][] res = new int[m_symbolSequenceNos.length][2];
int length = m_minLength;
for (int i = 0; i < m_symbolSequenceNos.length; i++)
{
while ((length < m_maxLength) && (i >= m_symbolOffsetPerLength[length - m_minLength + 1]))
{
length++;
}
res[i][0] = m_symbolSequenceNos[i];
res[i][1] = length;
}
return res;
}
/**
* Read the next symbol.
* @param in The input to read the symbol from.
* @return The next symbol.
* @throws IOException On I/O errors.
*/
int readNext(final BitInput in) throws IOException
{
int code = in.readBits(m_minLength);
// m_limitsPerLength.length == 0 means that all Huffman codes have the
// same length.
if (m_limitsPerLength.length == 0 || code <= m_limitsPerLength[0])
{
return m_symbolSequenceNos[code];
}
else
{
int codeLength = m_minLength;
int index = 1;
while (true)
{
code = (code << 1) | (in.readBit() ? 1 : 0);
codeLength++;
if ((codeLength == m_maxLength) || (code <= m_limitsPerLength[index]))
{
return m_symbolSequenceNos[m_symbolOffsetPerLength[index] + (code - m_baseValuesPerLength[index])];
}
index++;
}
}
}
/**
* Write a symbol.
* @param out The output to write to.
* @param symbol The symbol to write.
* @throws IOException On I/O errors.
*/
void write(final BitOutput out, final int symbol) throws IOException
{
out.writeBitsLittleEndian(m_huffmanCodesAndLengthsPerSymbol[symbol][0], m_huffmanCodesAndLengthsPerSymbol[symbol][1]);
}
/**
* Get the number of bits used for encoding the symbol.
*/
int getBitLength(int symbol)
{
return m_huffmanCodesAndLengthsPerSymbol[symbol][1];
}
/**
* Calculate the Huffman code lengths for the optimal, depth-limited Huffman
* tree for the supplied symbol frequencies.
* <p>
* This method uses the (slightly magic) algorithm from bzip2 1.0.5.
* @param frequencies The frequencies for each symbol in the data to be
* encoded.
* @param noSymbols The number of different symbols in the data to encode.
* This should be the maximum symbol value (the EOB symbol's value) + 1.
* @param maxLength The maximum code length which also will be the depth of
* the Huffman tree. If this is too small, this method will get stuck in an
* infinite loop.
* @return The Huffman code lengths for each symbol.
*/
static int[] createCodeLengths(final int[] frequencies, final int noSymbols, final int maxLength, final EncodingScratchpad scratchpad)
{
/*
* Nodes and heap entries run from 1. Entry 0 for both the heap and
* nodes is a sentinel.
*/
final int[] heap = scratchpad.m_htHeap;
final int[] weight = scratchpad.m_htWeight;
final int[] parent = scratchpad.m_htParent;
final int[] res = new int[noSymbols];
int actualMaxLength = -1;
int actualMinLength = Integer.MAX_VALUE;
for (int i = 0; i < noSymbols; i++)
{
weight[i + 1] = (frequencies[i] == 0 ? 1 : frequencies[i]) << 8;
}
while (true)
{
int noNodes = noSymbols;
int nHeap = 0;
heap[0] = 0;
weight[0] = 0;
parent[0] = -2;
for (int i = 1; i <= noSymbols; i++)
{
parent[i] = -1;
nHeap++;
heap[nHeap] = i;
upHeap(heap, weight, nHeap);
}
assert nHeap < MAX_NO_OF_SYMBOLS + 2;
while (nHeap > 1)
{
int n1 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
downHeap(heap, weight, nHeap, 1);
int n2 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
downHeap(heap, weight, nHeap, 1);
noNodes++;
parent[n1] = parent[n2] = noNodes;
weight[noNodes] = addWeights(weight[n1], weight[n2]);
parent[noNodes] = -1;
nHeap++;
heap[nHeap] = noNodes;
upHeap(heap, weight, nHeap);
}
assert noNodes < MAX_NO_OF_SYMBOLS * 2;
boolean tooLong = false;
INNER: for (int i = 1; i <= noSymbols; i++)
{
int j = 0;
int k = i;
while (parent[k] >= 0)
{
k = parent[k];
j++;
}
res[i - 1] = j;
if (j > maxLength)
{
tooLong = true;
break INNER;
}
if (j > actualMaxLength)
{
actualMaxLength = j;
}
if (j < actualMinLength)
{
actualMinLength = j;
}
}
if (!tooLong)
{
break;
}
for (int i = 1; i <= noSymbols; i++)
{
int j = weight[i] >> 8;
j = 1 + (j / 2);
weight[i] = j << 8;
}
}
return res;
}
}

View File

@ -0,0 +1,67 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* This {@link ErrorState} may have several observers which forces us to have to
* care about the owner of each registered error.
* <p>
* This is used when sharing the same
* {@link java.util.concurrent.ExecutorService} between several
* {@link BZip2OutputStream}:s.
* @author Karl Gustafsson
* @since 1.1
*/
final class MultipleObserverErrorState implements ErrorState
{
private Map<Object, Throwable> m_errors = new ConcurrentHashMap<Object, Throwable>(4);
public void checkAndClearErrors(Object ownerToken) throws Error, RuntimeException, IOException
{
Throwable t = m_errors.remove(ownerToken);
if (t != null)
{
if (t instanceof IOException)
{
throw (IOException) t;
}
else if (t instanceof RuntimeException)
{
throw (RuntimeException) t;
}
else if (t instanceof Error)
{
throw (Error) t;
}
else
{
throw new RuntimeException(t);
}
}
}
public void registerError(Throwable t, Object ownerToken)
{
m_errors.put(ownerToken, t);
}
}

View File

@ -0,0 +1,164 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.io.InputStream;
/**
* This stream run length decodes read data. It is used by the
* {@link BZip2InputStream}.
* @author Karl Gustafsson
* @since 1.1
*/
final class RLEDecodingInputStream extends InputStream
{
private static enum RLEState
{
READING, REPEATING, ABOUT_TO_READ_HOW_MANY_TO_REPEAT, EOF;
}
// Block checksum calculated while reading the block contents.
private final CRC m_blockChecksum = new CRC();
private final InputStream m_wrapped;
private final long m_readChecksum;
private RLEState m_state;
private int m_noLeftToRepeat;
private int m_last;
private int m_numberOfSimilar;
RLEDecodingInputStream(InputStream wrapped, long readChecksum)
{
m_wrapped = wrapped;
m_readChecksum = readChecksum;
m_state = RLEState.READING;
m_numberOfSimilar = 0;
m_last = -1;
}
private void handleEof() throws IOException
{
if (m_blockChecksum.getValue() != m_readChecksum)
{
throw new IOException("Invalid block checksum. Was " + m_blockChecksum.getValue() + ", expected " + m_readChecksum);
}
}
@Override
public int read() throws IOException
{
switch (m_state)
{
case EOF:
return -1;
case READING:
int val = m_wrapped.read();
if (val == -1)
{
m_state = RLEState.EOF;
handleEof();
return -1;
}
if (val == m_last)
{
m_numberOfSimilar++;
if (m_numberOfSimilar == 4)
{
// Four in a row. The next value is a repeat number.
m_state = RLEState.ABOUT_TO_READ_HOW_MANY_TO_REPEAT;
m_numberOfSimilar = 0;
}
}
else
{
m_numberOfSimilar = 1;
m_last = val;
}
m_blockChecksum.update(val);
return val;
case ABOUT_TO_READ_HOW_MANY_TO_REPEAT:
m_noLeftToRepeat = m_wrapped.read();
if (m_noLeftToRepeat == -1)
{
// A rather unexpected EOF
m_state = RLEState.EOF;
handleEof();
return -1;
}
else if (m_noLeftToRepeat == 0)
{
// Nothing to repeat. Go on to read the next value.
m_state = RLEState.READING;
return read();
}
else
{
m_state = RLEState.REPEATING;
m_noLeftToRepeat--;
if (m_noLeftToRepeat == 0)
{
// Just one to repeat, which we will do in this call.
m_state = RLEState.READING;
}
m_blockChecksum.update(m_last);
return m_last;
}
case REPEATING:
m_noLeftToRepeat--;
if (m_noLeftToRepeat == 0)
{
m_state = RLEState.READING;
}
m_blockChecksum.update(m_last);
return m_last;
default:
throw new RuntimeException("Unknown state " + m_state + ". This is a bug");
}
}
@Override
public int read(byte[] barr, int off, int len) throws IOException
{
// The ranges are validated by BZip2InputStream
for (int i = 0; i < len; i++)
{
int b = read();
if (b < 0)
{
// EOF
return i > 0 ? i : -1;
}
barr[off + i] = (byte) (b & 0xFF);
}
return len;
}
@Override
public void close() throws IOException
{
m_wrapped.close();
super.close();
}
}

View File

@ -0,0 +1,63 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicReference;
/**
* This is used to propagate errors from encoding threads to the thread using
* the {@link BZip2OutputStream} when there is only one object using the
* encoder.
* @author Karl Gustafsson
* @since 1.1
*/
final class SingleObserverErrorState implements ErrorState
{
private final AtomicReference<Throwable> m_exception = new AtomicReference<Throwable>();
public void checkAndClearErrors(Object ownerToken) throws Error, RuntimeException, IOException
{
Throwable t = m_exception.getAndSet(null);
if (t != null)
{
if (t instanceof IOException)
{
throw (IOException) t;
}
else if (t instanceof RuntimeException)
{
throw (RuntimeException) t;
}
else if (t instanceof Error)
{
throw (Error) t;
}
else
{
throw new RuntimeException(t);
}
}
}
public void registerError(Throwable t, Object ownerToken)
{
m_exception.set(t);
}
}

View File

@ -0,0 +1,992 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.comp.bzip2;
import java.util.Arrays;
/**
* This sort algorithm is used by the Burrows Wheeler encoder to sort the data
* to encode. It is an amalgation of three different sort algorithms. Radix sort
* is used to divide the input into 65536 different buckets. The quicksort is
* used to sort each bucket. When the quicksort iterations produce short enough
* blocks, shell sort is used.
* <p>
* See <a href="http://www.ddj.com/architect/184410724">Dr. Dobb's Journal from
* November 01 1998</a>.
* @author Karl Gustafsson
* @since 1.1
*/
final class ThreeWayRadixQuicksort
{
// The amount of overshoot in the data. See below.
static final int DATA_OVERSHOOT = 20;
// The deepest sort that we do with quicksort. Deeper sorts use shell sort.
// This value should be less than the DATA_OVERSHOOT.
private static final int QUICKSORT_DEPTH_THRESHOLD = 18;
// The size of the sorting stack. This size is the same as for bzip2 1.0.5.
static final int SORT_STACK_SIZE = 100;
/**
* The increments for shell sort. Borrowed from bzip2.
* <p>
* Knuth's increments seem to work better than Incerpi-Sedgewick here.
* Possibly because the number of elems to sort is usually small, typically
* &lt;= 20.
*/
private static final int[] SHELL_SORT_INCREMENTS = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
// Declared package private for the unit tests
static class QuickSortRangeInfo
{
private final int m_bucketStartPos;
// The length of the bucket measured in number of symbols.
private final int m_bucketLen;
private final int m_depth;
QuickSortRangeInfo(int bucketStartPos, int bucketLen, int depth)
{
m_bucketStartPos = bucketStartPos;
m_bucketLen = bucketLen;
m_depth = depth;
}
}
// The data array.
private final byte[] m_data;
// The length of the data in the array. Data occupies the positions 0 to
// m_length - 1 in the array.
private final int m_length;
// The shortest data block length that quicksort will be used for. For
// shorter blocks, shell sort is used.
private final int m_minLengthForQuicksort;
// Contains preallocated data structures. Used to reduce the number of
// temporary objects that are created and thus avoid time spent gc:ing.
private final EncodingScratchpad m_scratchpad;
// Cache with sort results that are used to speed up the sorting. This works
// because all strings to sort are rotations of a single string.
private final int[] m_sortCache;
// Use a stack of sort range information instead of calling the quicksort
// methods recursively.
private final QuickSortRangeInfo[] m_sortStack;
// A pointer to the current position in the sort stack.
private int m_sortStackPointer = -1;
// Array containing a pointer for each element in m_data to its location in
// the sorted data.
// This is declared package private for the unit tests.
final int[] m_ptr;
/**
* Create a new sorting object.
* @param data The data to sort. This array should contain an overshoot of
* {@code DATA_OVERSHOOT} bytes. I.e: the data array should have a length of
* at least {@code length + DATA_OVERSHOOT} bytes, and the last {@code
* DATA_OVERSHOOT} bytes should be equal to the first {@code DATA_OVERSHOOT}
* bytes. This makes a few sorting optimizations possible.
* <p>
* If the length of the data is less than {@code DATA_OVERSHOOT} bytes, the
* overshoot should contain the data repeated.
* @param minLengthForQuicksort Segments that are shorter than this length
* are sorted with shell sort instead of quicksort.
*/
ThreeWayRadixQuicksort(final byte[] data, final int length, final int minLengthForQuicksort, final EncodingScratchpad sp) throws IllegalArgumentException
{
assert data.length >= length + DATA_OVERSHOOT;
if (length > data.length)
{
throw new IllegalArgumentException("Invalid data length " + length + ". It must be <= the length of the data array (" + data.length + ")");
}
if (minLengthForQuicksort < 3)
{
throw new IllegalArgumentException("Invalid minimum length for Quicksort " + minLengthForQuicksort + ". It must be >= 3");
}
m_data = data;
m_length = length;
m_minLengthForQuicksort = minLengthForQuicksort;
m_scratchpad = sp;
m_sortStack = m_scratchpad.m_sortStack;
// Clear the sortCache array
m_sortCache = m_scratchpad.m_sortCache;
Arrays.fill(m_sortCache, 0);
m_ptr = m_scratchpad.m_ptrs;
}
/**
* Get the data at the specified position. It is assumed that the position
* is within the range of the data.
* <p>
* This method is so small so that it will likely be inlined by the Java
* compiler.
*/
private int getDataAt(final int pos)
{
return m_data[pos] & 0xFF;
}
/**
* Make the initial radix sort of the data into 65536 buckets. As a side
* effect, this method populates the {@code m_ptr} array with the results of
* the sort.
* <p>
* This method is declared package-private for the unit tests.
* @return The start positions for each bucket (in the {@code m_ptr} array).
*/
int[] radixSort()
{
// This array will contain the frequencies of each two byte combination
// in the data.
final int[] frequencies = m_scratchpad.m_twoByteFrequencies;
Arrays.fill(frequencies, 0);
// Iterate over the data and collect the frequencies of each occurring
// two byte combination.
int val = getDataAt(0) << 8;
for (int i = m_length - 1; i >= 0; i--)
{
val = val >>> 8 | (getDataAt(i) << 8);
frequencies[val]++;
}
// Convert the frequencies array to contain the last data element
// position + 1 for each two byte bucket.
for (int i = 1; i < 65536; i++)
{
frequencies[i] += frequencies[i - 1];
}
// The m_ptr array will contain the pointers between each two byte
// combination's bucket location and its location in the data array.
// This loop will also modify the frequencies array to contain the
// starting position of each data bucket.
val = getDataAt(0) << 8;
for (int i = m_length - 1; i >= 0; i--)
{
val = val >>> 8 | (getDataAt(i) << 8);
int pos = --frequencies[val];
m_ptr[pos] = i;
}
// Now frequencies contain the first location of each bucket and m_ptr
// contains pointers between the data locations in the buckets and the
// data in the data array.
return frequencies;
}
/**
* Get the position that contains the median of the values at the three
* positions.
*/
private int med3(final int pos1, final int pos2, final int pos3, final int depth)
{
int v1, v2, v3;
if ((v1 = getDataAt(m_ptr[pos1] + depth)) == (v2 = getDataAt(m_ptr[pos2] + depth)))
{
return pos1;
}
if (((v3 = getDataAt(m_ptr[pos3] + depth)) == v1) || (v3 == v2))
{
return pos3;
}
return v1 < v2 ? (v2 < v3 ? pos2 : (v1 < v3 ? pos3 : pos1)) : (v2 > v3 ? pos2 : (v1 < v3 ? pos1 : pos3));
}
/**
* Select the pivot value for the quicksort.
* @return The position of the pivot value.
*/
private int selectPivot(final QuickSortRangeInfo qsri)
{
int pos1 = qsri.m_bucketStartPos;
int pos3 = pos1 + qsri.m_bucketLen - 1;
int pos2 = (pos1 + pos3) / 2;
// For a large bucket, use a median of three median values
if (qsri.m_bucketLen > 500)
{
int d = qsri.m_bucketLen / 8;
pos1 = med3(pos1, pos1 + d, pos1 + 2 * d, qsri.m_depth);
pos2 = med3(pos2 - d, pos2, pos2 + d, qsri.m_depth);
pos3 = med3(pos3 - 2 * d, pos3 - d, pos3, qsri.m_depth);
}
return med3(pos1, pos2, pos3, qsri.m_depth);
}
/**
* Swap the elements in the two positions in the array.
*/
private void swap(final int pos1, final int pos2)
{
int v1 = m_ptr[pos1];
m_ptr[pos1] = m_ptr[pos2];
m_ptr[pos2] = v1;
}
/**
* Shell sort the data in the range. This is used for data ranges that are
* too short to be quicksorted.
* <p>
* This method is declared package private for the unit tests.
*/
void shellSortRange(final QuickSortRangeInfo qsri)
{
// If the implementation of this method looks strange it is because it
// is heavily optimized.
final int len = qsri.m_bucketLen;
final int depth = qsri.m_depth;
final int startPos = qsri.m_bucketStartPos;
final int endPos = startPos + len;
int incMax = 1;
while (SHELL_SORT_INCREMENTS[incMax] < len)
{
incMax++;
}
for (int incrementPtr = incMax - 1; incrementPtr >= 0; incrementPtr--)
{
final int increment = SHELL_SORT_INCREMENTS[incrementPtr];
final int startIter = startPos + increment;
for (int i = startIter; i < endPos; i++)
{
INCLOOP: for (int j = i; j >= startIter; j -= increment)
{
int curDepth = depth;
int curPos1 = m_ptr[j - increment] + depth - 1;
int curPos2 = m_ptr[j] + depth - 1;
// Tests with sort cache lookups.
// Inner loop.
while (true)
{
while (curPos1 >= m_length)
{
curPos1 -= m_length;
}
while (curPos2 >= m_length)
{
curPos2 -= m_length;
}
// Eight tests with sort cache lookups. The data
// overshoot helps us to avoid range checks when
// the pointers are incremented.
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 2
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 3
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 4
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 5
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 6
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 7
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
// 8
if (getDataAt(++curPos1) == getDataAt(++curPos2))
{
if (m_sortCache[curPos1] == m_sortCache[curPos2])
{
curDepth += 8;
if (curDepth >= m_length)
{
// The strings are exactly equal. This can happen for bzip2 when
// we have input such as AAA (only) that does not get run length
// encoded.
break INCLOOP;
}
// The eight symbols were equals and no cache hits. Continue the inner loop
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (m_sortCache[curPos1] < m_sortCache[curPos2])
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
else
{
if (getDataAt(curPos1) < getDataAt(curPos2))
{
break INCLOOP;
}
else
{
swap(j - increment, j);
continue INCLOOP;
}
}
}
}
}
}
}
/**
* Get the index of the string that has the first differing value at the
* given depth compared to the first string in the range.
* @param bucketStartPos The start of the range.
* @param bucketLen The length of the range.
* @param depth The depth to investigate.
* @return The index of the first differing value, or {@code -1} if all
* values are equal at the given depth.
*/
private int getPositionOfFirstDifferingValue(final int bucketStartPos, final int bucketLen, final int depth)
{
assert depth <= DATA_OVERSHOOT;
final int c0 = getDataAt(m_ptr[bucketStartPos] + depth);
final int upperBound = bucketStartPos + bucketLen;
for (int i = bucketStartPos + 1; i < upperBound; i++)
{
if (getDataAt(m_ptr[i] + depth) != c0)
{
return i;
}
}
// All values at this depth are equal
return -1;
}
/**
* Swap the {@code len} values after {@code r1Start} with the {@code len}
* values after {@code r2start}.
* @param r1Start The start of the first range.
* @param r2Start The start of the second range.
* @param len The number of bytes to swap.
*/
private void swapRanges(final int r1Start, final int r2Start, final int len)
{
assert r1Start + len <= r2Start;
// Is the scratchpad's temp area large enough?
if (m_scratchpad.m_tempArea.length < len)
{
// No. Reallocate it
m_scratchpad.m_tempArea = new int[len + 100];
}
System.arraycopy(m_ptr, r1Start, m_scratchpad.m_tempArea, 0, len);
System.arraycopy(m_ptr, r2Start, m_ptr, r1Start, len);
System.arraycopy(m_scratchpad.m_tempArea, 0, m_ptr, r2Start, len);
}
/**
* Add the range to the stack containing ranges that are left to sort.
*/
private void addRangeToStack(final int bucketStartPos, final int bucketLen, final int depth)
{
if (bucketLen < 2)
{
// Already sorted
return;
}
else
{
m_sortStack[++m_sortStackPointer] = new QuickSortRangeInfo(bucketStartPos, bucketLen, depth);
}
}
/**
* Quicksort the range.
* <p>
* This method is declared package-private for the unit tests.
*/
void quickSortRange(final QuickSortRangeInfo qsri)
{
// Select the pivot element.
final int pivot = selectPivot(qsri);
// Move the pivot into the first position
swap(qsri.m_bucketStartPos, pivot);
// First check if all characters are equal at the given depth, in which
// case we increase the depth and try again
int sortDepth = qsri.m_depth;
// The sort depth threshold should be less than the overshoot. If it
// were not, we would have to think of the boundaries of the m_data
// array and such.
assert sortDepth < DATA_OVERSHOOT;
int posAtFirstDifferingValue = getPositionOfFirstDifferingValue(qsri.m_bucketStartPos, qsri.m_bucketLen, sortDepth);
while (posAtFirstDifferingValue == -1)
{
// All characters at the current depth are equal. Sort using an
// increased depth.
if (sortDepth == m_length)
{
// We hit the tiles. All strings are equal.
return;
}
else
{
if (++sortDepth < QUICKSORT_DEPTH_THRESHOLD)
{
posAtFirstDifferingValue = getPositionOfFirstDifferingValue(qsri.m_bucketStartPos, qsri.m_bucketLen, sortDepth);
}
else
{
// Use shell sort instead
shellSortRange(qsri);
return;
}
}
}
// Sort using the calculated depth.
// Iterate through the data to sort using two pointers advancing
// from each end of the data range to sort.
// Create one area at the start of the range and one at the end of
// the range where we move values that are equal to the pivot value.
int lowPtr = posAtFirstDifferingValue;
// Pointer pointing to the element after the lower pivot range
int lowPivotRangePtr = posAtFirstDifferingValue;
int hiPtr = qsri.m_bucketStartPos + qsri.m_bucketLen - 1;
// Pointer pointing to the element before the upper pivot range.
int hiPivotRangePtr = hiPtr;
int pivotVal = getDataAt(m_ptr[qsri.m_bucketStartPos] + sortDepth);
while (true)
{
int curData;
// Move the lower pointer forward
while (lowPtr <= hiPtr && (curData = getDataAt(m_ptr[lowPtr] + sortDepth)) <= pivotVal)
{
if (curData == pivotVal)
{
// Move the data into the lower pivot range and increase
// the pivot range pointer.
swap(lowPtr, lowPivotRangePtr++);
}
lowPtr++;
}
// Move the upper pointer backwards
while (lowPtr <= hiPtr && (curData = getDataAt(m_ptr[hiPtr] + sortDepth)) >= pivotVal)
{
if (curData == pivotVal)
{
// Move the data into the upper pivot range and decrease
// the pivot range pointer.
swap(hiPtr, hiPivotRangePtr--);
}
hiPtr--;
}
if (lowPtr > hiPtr)
{
// We're done
break;
}
// Now the value at lowPtr is larger than the pivot
// value and the value at hiPtr is smaller. Swap the two
// values and continue moving the pointers.
swap(lowPtr++, hiPtr--);
}
// Merge and move the two pivot ranges to the center of the array
// and sort the three resulting segments.
// Swap the smallest possible ranges
final int lowRangeLen = lowPtr - lowPivotRangePtr;
int rlen = Math.min(lowPivotRangePtr - qsri.m_bucketStartPos, lowRangeLen);
if (rlen > 0)
{
swapRanges(qsri.m_bucketStartPos, lowPtr - rlen, rlen);
}
final int hiRangeLen = hiPivotRangePtr - hiPtr;
rlen = Math.min(qsri.m_bucketStartPos + qsri.m_bucketLen - hiPivotRangePtr - 1, hiRangeLen);
if (rlen > 0)
{
swapRanges(lowPtr, qsri.m_bucketStartPos + qsri.m_bucketLen - rlen, rlen);
}
final int pivotRangeLen = qsri.m_bucketLen - lowRangeLen - hiRangeLen;
// Sort the lower range
addRangeToStack(qsri.m_bucketStartPos, lowRangeLen, sortDepth);
// Sort the pivot range at an increased depth
addRangeToStack(qsri.m_bucketStartPos + lowRangeLen, pivotRangeLen, sortDepth + 1);
// Sort the higher range
addRangeToStack(qsri.m_bucketStartPos + lowRangeLen + pivotRangeLen, hiRangeLen, sortDepth);
}
/**
* Sort all strings in the bucket.
* <p>
* This method is declared package private for the unit tests.
* @param bucketStartPos The start position of the bucket.
* @param bucketLen The length of the bucket.
* @param depth The depth to start comparing strings at. (The strings are
* all equal at lower depths.)
*/
void sortBucket(final int bucketStartPos, final int bucketLen, final int depth)
{
if (bucketLen < 2)
{
// Already sorted
return;
}
assert m_sortStackPointer == -1;
// Use a stack with quick sort pass settings instead of recursing since
// the stack may become very large.
m_sortStack[++m_sortStackPointer] = new QuickSortRangeInfo(bucketStartPos, bucketLen, depth);
while (m_sortStackPointer >= 0)
{
QuickSortRangeInfo qsri = m_sortStack[m_sortStackPointer--];
// The minimum length of the segments to sort is 2. That is ensured
// by the addRangeToStack method.
if ((qsri.m_bucketLen < m_minLengthForQuicksort) || (qsri.m_depth > QUICKSORT_DEPTH_THRESHOLD))
{
shellSortRange(qsri);
}
else
{
// This adds up to three new sort ranges to the stack
// (values less than, equal to and higher than the pivot value)
quickSortRange(qsri);
}
}
}
/**
* Calculate the sort order for all big buckets. (256 of them in all, each
* containing 256 small buckets.)
* <p>
* Smaller buckets are sorted before larger. This is a more efficient way of
* filling the sort cache.
* @param bucketStartPositions The start positions for all large buckets.
* @return An array containing the indices of the large buckets in the order
* that they should be sorted.
*/
private int[] establishSortOrder(final int[] bucketStartPositions)
{
final int[] sortOrder = m_scratchpad.m_sortOrder;
for (int i = 0; i < 256; i++)
{
sortOrder[i] = i;
}
// Shell sort the sort orders
// incPtr == 4 gives an increment of 121
for (int incPtr = 4; incPtr >= 0; incPtr--)
{
final int increment = SHELL_SORT_INCREMENTS[incPtr];
for (int i = increment; i < sortOrder.length; i++)
{
INCLOOP: for (int j = i; j >= increment; j -= increment)
{
// Which of the lengths of the big buckets is the longest
final int so1 = sortOrder[j - increment];
final int so2 = sortOrder[j];
if ((bucketStartPositions[so1 * 256 + 255] - bucketStartPositions[so1 * 256]) > (bucketStartPositions[so2 * 256 + 255] - bucketStartPositions[so2 * 256]))
{
sortOrder[j] = so1;
sortOrder[j - increment] = so2;
}
else
{
// This sort order element is in its right position.
break INCLOOP;
}
}
}
}
return sortOrder;
}
/**
* Sort the data. This method borrows optimizations from bzip2 1.0.5.
* @return An array with pointers from each byte's original position to its
* position in the sorted data.
*/
int[] sort()
{
if (m_length == 0)
{
return new int[0];
}
// Run a least significant digit radix sort on all two-byte permutations
// of the incoming data. This gives 256^2 buckets with similar data
// which can then be sorted individually.
// This method call also creates and populates the m_ptr array.
// The bucketStartPositions has an overshoot of one position, which
// gives it the length 65537. The overshoot element should be equal to
// the length of the data.
final int[] bucketStartPositions = radixSort();
// Fix the overshoot
bucketStartPositions[65536] = m_length;
final boolean[] sortedLargeBuckets = m_scratchpad.m_sortedLargeBuckets;
Arrays.fill(sortedLargeBuckets, false);
final boolean[] sortedSmallBuckets = m_scratchpad.m_sortedSmallBuckets;
Arrays.fill(sortedSmallBuckets, false);
final int[] copyStart = m_scratchpad.m_copyStart;
final int[] copyEnd = m_scratchpad.m_copyEnd;
// Establish a sort order for all big buckets (256 of them in all) with
// the shortest buckets coming first. This will make the sort result
// caching optimization most efficient
final int[] sortOrder = establishSortOrder(bucketStartPositions);
// Quick sort the elements in each non-empty bucket.
for (int largeBucketIndex = 0; largeBucketIndex < 256; largeBucketIndex++)
{
final int largeBucketNo = sortOrder[largeBucketIndex];
for (int smallBucketNo = 0; smallBucketNo < 256; smallBucketNo++)
{
// Don't sort when smallBucketNo == largeBucketNo. This small
// bucket will be dealt with by the scanning step below.
if (smallBucketNo != largeBucketNo)
{
final int bucketIndex = largeBucketNo * 256 + smallBucketNo;
if (!sortedSmallBuckets[bucketIndex])
{
final int bucketStartPos = bucketStartPositions[bucketIndex];
final int bucketLen = bucketStartPositions[bucketIndex + 1] - bucketStartPos;
if (bucketLen > 1)
{
// More than one data element in this bucket. Sort it.
sortBucket(bucketStartPos, bucketLen, 2);
}
sortedSmallBuckets[bucketIndex] = true;
}
}
}
// Now that we have sorted all small buckets in the large bucket n,
// we can infer the sorted order for the small bucket n in all
// large buckets m, including (magically) the small bucket n in the
// large bucket n that we did not sort above.
for (int m = 0; m < 256; m++)
{
copyStart[m] = bucketStartPositions[m * 256 + largeBucketNo];
copyEnd[m] = bucketStartPositions[m * 256 + largeBucketNo + 1] - 1;
}
for (int i = bucketStartPositions[largeBucketNo * 256]; i < copyStart[largeBucketNo]; i++)
{
int k = m_ptr[i] - 1;
if (k < 0)
{
k += m_length;
}
final int m = getDataAt(k);
if (!sortedLargeBuckets[m])
{
int index = copyStart[m]++;
if (index >= m_length)
{
index -= m_length;
}
m_ptr[index] = k;
}
}
for (int i = bucketStartPositions[(largeBucketNo + 1) * 256] - 1; i > copyEnd[largeBucketNo]; i--)
{
int k = m_ptr[i] - 1;
if (k < 0)
{
k += m_length;
}
final int m = getDataAt(k);
if (!sortedLargeBuckets[m])
{
int index = copyEnd[m]--;
if (index < 0)
{
index += m_length;
}
m_ptr[index] = k;
}
}
// Mark all buckets that we got for free as sorted
for (int m = 0; m < 256; m++)
{
sortedSmallBuckets[m * 256 + largeBucketNo] = true;
}
sortedLargeBuckets[largeBucketNo] = true;
// Fix the sort cache for the large bucket.
// Don't do it for the last sorted bucket.
if (largeBucketIndex != 255)
{
final int largeBucketStart = bucketStartPositions[largeBucketNo * 256];
final int largeBucketEnd;
if (largeBucketNo < 255)
{
largeBucketEnd = bucketStartPositions[(largeBucketNo + 1) * 256];
}
else
{
largeBucketEnd = m_length;
}
final int largeBucketSize = largeBucketEnd - largeBucketStart;
assert largeBucketSize >= 0;
int shifts = 0;
while (largeBucketSize >>> shifts > 65534)
{
shifts++;
}
for (int i = largeBucketSize - 1; i >= 0; i--)
{
final int sptr = m_ptr[largeBucketStart + i];
final int qval = i >>> shifts;
m_sortCache[sptr] = qval;
if (sptr < DATA_OVERSHOOT)
{
// Update cache in overshoot too
m_sortCache[m_length + sptr] = qval;
}
}
}
}
return m_ptr;
}
}

View File

@ -0,0 +1,29 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* This package contains the {@link org.at4j.comp.bzip2.BZip2InputStream} and
* {@link org.at4j.comp.bzip2.BZip2OutputStream} stream implementations for
* decompressing and compressing data. The
* {@link org.at4j.comp.bzip2.BZip2ReadableFile} and
* {@link org.at4j.comp.bzip2.BZip2WritableFile} can be used to transparently
* decompress and compress data in files.
* @since 1.0
* @author Karl Gustafsson
*/
package org.at4j.comp.bzip2;

View File

@ -0,0 +1,24 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* The {@link org.at4j.comp.CompressionLevel} enum.
* @since 1.0
* @author Karl Gustafsson
*/
package org.at4j.comp;

View File

@ -0,0 +1,173 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.comp;
/**
* A move-to-front (MTF) encoder and decoder for bytes. For more information on
* MTF encoding, see<a href="http://en.wikipedia.org/wiki/Move_to_front">the
* Wikipedia article on move-to-front transforms</a>.
* <p>
* This object is not thread safe. Clients must provide external synchronization
* if they are to use it from several concurrent threads.
* @author Karl Gustafsson
* @since 1.1
* @see IntMoveToFront
*/
public class ByteMoveToFront
{
private final byte[] m_alphabet;
private static byte[] createByteAlphabetFromRange(int minVal, int maxVal) throws IndexOutOfBoundsException
{
if ((minVal < 0) || (maxVal > 255) || (minVal >= maxVal))
{
throw new IndexOutOfBoundsException("Invalid min and/or max value: min " + minVal + ", max " + maxVal);
}
int alphLen = maxVal - minVal + 1;
byte[] alphabet = new byte[alphLen];
for (int i = 0; i < alphLen; i++)
{
alphabet[i] = (byte) ((i + minVal) & 0xFF);
}
return alphabet;
}
/**
* Create a byte MTF encoder/decoder that transforms bytes in the range
* between {@code minValue} and {@code maxValue}.
* <p>
* The initial alphabet of the transformer will be {@code minValue &hellip;
* maxValue}.
* @param minValue The start value of the range. This should be an unsigned
* byte in the range 0 to 254.
* @param maxValue The end value of the range. This should be an unsigned
* byte in the range 1 to 255.
* @throws IndexOutOfBoundsException If the min and/or the max values are
* not unsigned bytes or if the min value is equal to or greater than the
* max value.
*/
public ByteMoveToFront(int minValue, int maxValue) throws IndexOutOfBoundsException
{
this(createByteAlphabetFromRange(minValue, maxValue));
}
/**
* Create a byte MTF encoder/decoder that transforms bytes using the
* supplied initial alphabet.
* @param alphabet The initial alphabet. This byte array is <i>not</i>
* copied by this method and it will be modified by encoding or decoding
* operations.
*/
public ByteMoveToFront(byte[] alphabet)
{
// Null check
alphabet.getClass();
m_alphabet = alphabet;
}
/**
* Encode the bytes in {@code in} and store them in the array {@code out}.
* The MTF alphabet is also updated by this method.
* @param in The bytes to encode.
* @param out The array to store the encoded bytes in. This array must be at
* least as long as {@code in}.
* @return {@code out}
* @throws ArrayIndexOutOfBoundsException If any of the bytes in {@code in}
* are not in the MTF alphabet.
* @throws IllegalArgumentException If the {@code out} array is too short.
*/
public byte[] encode(byte[] in, byte[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
{
if (out.length < in.length)
{
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
}
for (int i = 0; i < in.length; i++)
{
byte val = in[i];
if (m_alphabet[0] == val)
{
out[i] = 0;
}
else
{
byte prev = m_alphabet[0];
int j = 1;
while (true)
{
byte nextPrev = m_alphabet[j];
if (m_alphabet[j] == val)
{
out[i] = (byte) (j & 0xFF);
m_alphabet[0] = m_alphabet[j];
m_alphabet[j] = prev;
break;
}
m_alphabet[j] = prev;
prev = nextPrev;
j++;
}
}
}
return out;
}
/**
* Decode a single byte and update the MTF alphabet.
* @param index The index in the MTF alphabet for the byte.
* @return The byte.
*/
public byte decode(int index)
{
byte val = m_alphabet[index];
for (int j = index; j > 0; j--)
{
m_alphabet[j] = m_alphabet[j - 1];
}
m_alphabet[0] = val;
return val;
}
/**
* Decode an array of bytes and update the MTF alphabet. The decoded bytes
* are stored in {@code out}.
* @param in The bytes to decode.
* @param out The array to store the decoded bytes in. This array must be at
* least as long as {@code in}.
* @return {@code out}
* @throws ArrayIndexOutOfBoundsException If any of the bytes in {@code in}
* are not in the MTF alphabet.
* @throws IllegalArgumentException If {@code out} is too short.
*/
public byte[] decode(byte[] in, byte[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
{
if (out.length < in.length)
{
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
}
for (int i = 0; i < in.length; i++)
{
out[i] = decode(in[i]);
}
return out;
}
}

View File

@ -0,0 +1,177 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.comp;
/**
* A move-to-front (MTF) encoder and decoder for integers. For more information
* on MTF encoding, see<a href="http://en.wikipedia.org/wiki/Move_to_front">the
* Wikipedia article on move-to-front transforms</a>.
* <p>
* This object is not thread safe. Clients must provide external synchronization
* if they are to use it from several concurrent threads.
* @author Karl Gustafsson
* @since 1.1
* @see ByteMoveToFront
*/
public class IntMoveToFront
{
private final int[] m_alphabet;
private static int[] createIntAlphabetFromRange(int minVal, int maxVal) throws IndexOutOfBoundsException
{
if (minVal >= maxVal)
{
throw new IndexOutOfBoundsException("Invalid min and max values. Min=" + minVal + ", max=" + maxVal);
}
int alphLen = maxVal - minVal + 1;
int[] alphabet = new int[alphLen];
for (int i = 0; i < alphLen; i++)
{
alphabet[i] = i + minVal;
}
return alphabet;
}
/**
* Create a byte MTF encoder/decoder that transforms integers in the range
* between {@code minValue} and {@code maxValue}.
* <p>
* The initial alphabet of the transformer will be {@code minValue &hellip;
* maxValue}.
* @param minValue The start value of the range.
* @param maxValue The end value of the range.
* @throws IndexOutOfBoundsException If the min value is equal to or greater
* than the max value.
*/
public IntMoveToFront(int minValue, int maxValue) throws IndexOutOfBoundsException
{
this(createIntAlphabetFromRange(minValue, maxValue));
}
/**
* Create a byte MTF encoder/decoder that transforms integers using the
* supplied initial alphabet.
* @param alphabet The initial alphabet. This integer array is <i>not</i>
* copied by this method and it will be modified by encoding or decoding
* operations.
*/
public IntMoveToFront(int[] alphabet)
{
// Null check
alphabet.getClass();
m_alphabet = alphabet;
}
/**
* Encode the integers in {@code in} and store them in the array {@code out}
* . The MTF alphabet is also updated by this method.
* @param in The integers to encode.
* @param out The array to store the encoded integers in. This array must be
* at least as long as {@code in}.
* @return {@code out}
* @throws ArrayIndexOutOfBoundsException If any of the integers in {@code
* in} are not in the MTF alphabet.
* @throws IllegalArgumentException If the {@code out} array is too short.
*/
public int[] encode(int[] in, int[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
{
if (out.length < in.length)
{
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
}
for (int i = 0; i < in.length; i++)
{
int val = in[i];
if (m_alphabet[0] == val)
{
out[i] = 0;
}
else
{
int prev = m_alphabet[0];
int j = 1;
while (true)
{
int nextPrev = m_alphabet[j];
if (m_alphabet[j] == val)
{
out[i] = (byte) (j & 0xFF);
m_alphabet[0] = m_alphabet[j];
m_alphabet[j] = prev;
break;
}
m_alphabet[j] = prev;
prev = nextPrev;
j++;
}
}
}
return out;
}
/**
* Decode a single integer and update the MTF alphabet.
* @param index The index in the MTF alphabet for the integer.
* @return The integer.
*/
public int decode(int index)
{
int val = m_alphabet[index];
for (int j = index; j > 0; j--)
{
m_alphabet[j] = m_alphabet[j - 1];
}
m_alphabet[0] = val;
return val;
}
/**
* Decode an array of integers and update the MTF alphabet. The decoded
* integers are stored in {@code out}.
* @param in The integers to decode.
* @param out The array to store the decoded integers in. This array must be
* at least as long as {@code in}.
* @return {@code out}
* @throws ArrayIndexOutOfBoundsException If any of the integers in {@code
* in} are not in the MTF alphabet.
* @throws IllegalArgumentException If {@code out} is too short.
*/
public int[] decode(int[] in, int[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
{
if (out.length < in.length)
{
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
}
for (int i = 0; i < in.length; i++)
{
int index = in[i];
int val = m_alphabet[index];
for (int j = index; j > 0; j--)
{
m_alphabet[j] = m_alphabet[j - 1];
}
m_alphabet[0] = val;
out[i] = val;
}
return out;
}
}

View File

@ -0,0 +1,24 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Support classes for compression and decompression.
* @since 1.1
* @author Karl Gustafsson
*/
package org.at4j.support.comp;

View File

@ -0,0 +1,180 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.io;
import java.io.Closeable;
import java.io.IOException;
/**
* This interface identifies a source for bits.
* <p>
* The source is assumed to have a position which may or may not be at a byte
* boundary (every eight bits).
* <p>
* If an implementing class also extends {@link java.io.InputStream} it can be
* used as an input stream. This interface redefines {@link java.io.InputStream}
* 's read methods with the extra condition that they may only be used if the
* current position of the source is at a byte boundary. The
* {@link #readBytes(byte[], int, int)} method does not have that limitation.
* @author Karl Gustafsson
* @since 1.1
* @see java.io.InputStream
* @see BitOutput
*/
public interface BitInput extends Closeable
{
/**
* Has the input come to its end? If so, nothing more can be read from it.
* @return {@code true} if no more can be read from this input.
*/
boolean isAtEof();
/**
* Move the position to the next byte boundary. If the current position is
* already at a byte boundary, this method does nothing.
* @throws IOException On I/O errors or if this input is already at the end
* of the available data.
*/
void skipToByteBoundary() throws IOException;
/**
* Read the value of the next bit in the stream.
* @return {@code true} if the value is 1, {@code false} if it is 0.
* @throws IOException On I/O errors or if this input is already at the end
* of the available data.
*/
boolean readBit() throws IOException;
/**
* Read up to eight bits from the input.
* @param no The number of bits to read.
* @return The bits as the least significant bits of the returned integer.
* For instance, if {@code 1011} is read, the returned integer will have the
* value {@code 1 * 8 + 0 * 4 + 1 * 2 + 1 * 1 == 11}.
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or greater
* than 8.
* @throws IOException On I/O errors or if this input is already at the end
* of the available data.
* @see #readBitsLittleEndian(int)
*/
int readBits(int no) throws IndexOutOfBoundsException, IOException;
/**
* Read up to 32 bits from the input. The first eight bits that is read will
* be the most significant byte of the returned integer.
* @param no The number of bits to read.
* @return The bits read as the least significant bits of the returned
* integer. (Just like for {@link #readBits(int)}.
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or greater
* than 32.
* @throws IOException On I/O errors or if this input is already at the end
* of the available data.
* @see #readBits(int)
*/
int readBitsLittleEndian(int no) throws IndexOutOfBoundsException, IOException;
/**
* Read bytes from the input. Unlike {@link #read(byte[], int, int)}, this
* method does not require that the current position is at a byte boundary.
* <p>
* Another difference to {@link #read(byte[], int, int)} is that this method
* throws an {@link IOException} if it cannot read all requested bytes.
* @param barr The byte array to read bytes into.
* @param off The offset in the array to start writing read bytes at.
* @param len The number of bytes to read.
* @return {@code barr}.
* @throws IndexOutOfBoundsException If the length or the offset is negative
* or if the sum of the length and the offset is greater than the length of
* the supplied byte array.
* @throws IOException On I/O errors or if there was not enough bytes to
* read from the input.
* @see #read(byte[], int, int)
*/
public byte[] readBytes(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException;
/**
* Read a single byte from the input. See {@link java.io.InputStream#read()}
* .
* <p>
* This method requires that the current position in the input is at a byte
* boundary.
* @return The read byte or {@code -1} if the current position is at the end
* of the input.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
* @see java.io.InputStream#read()
*/
int read() throws IOException;
/**
* Read bytes into the supplied array. See
* {@link java.io.InputStream#read(byte[])}.
* <p>
* This method requires that the current position in the input is at a byte
* boundary.
* @param barr The byte array to read bytes into.
* @return The number of bytes read.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
* @see java.io.InputStream#read(byte[])
*/
int read(byte[] barr) throws IOException;
/**
* Read bytes into the supplied array. See
* {@link java.io.InputStream#read(byte[], int, int)}.
* <p>
* This method requires that the current position in the input is at a byte
* boundary.
* @param barr The byte array to read bytes into.
* @param offset The offset position in the array to start write read bytes
* to.
* @param len The number of bytes to read.
* @return The number of bytes actually read.
* @throws IndexOutOfBoundsException If the offset or the length is negative
* or if the sum of the offset and the length is greater than the length of
* the supplied byte array.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
*/
int read(byte[] barr, int offset, int len) throws IndexOutOfBoundsException, IOException;
/**
* Skip bytes in the input. See {@link java.io.InputStream#skip(long)}.
* <p>
* This method requires that the current position in the input is at a byte
* boundary.
* @param n The number of bytes to skip.
* @return The number of bytes skipped.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
*/
long skip(long n) throws IOException;
/**
* Get the number of bytes available in the input. See
* {@link java.io.InputStream#available()}.
* <p>
* This method requires that the current position in the input is at a byte
* boundary.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
*/
int available() throws IOException;
}

View File

@ -0,0 +1,162 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.io;
import java.io.Closeable;
import java.io.IOException;
/**
* This interface identifies a sink for bits.
* <p>
* The sink is assumed to have a position which may or may not be at a byte
* boundary (every eight bits).
* <p>
* If an implementing class also extends {@link java.io.OutputStream} it can be
* used as an output stream. This interface redefines
* {@link java.io.OutputStream}'s write methods with the extra condition that
* they may only be used if the current position of the sink is at a byte
* boundary. The {@link #writeBytes(byte[], int, int)} method does not have that
* limitation.
* @author Karl Gustafsson
* @since 1.1
* @see java.io.OutputStream
* @see BitInput
*/
public interface BitOutput extends Closeable
{
/**
* Pad the output with zeroes to the next byte boundary. If the current
* position is already at a byte boundary, this method does nothing.
* @throws IOException On I/O errors.
*/
void padToByteBoundary() throws IOException;
/**
* Get the value of the unfinished byte. The value is shifted so that the
* least significant bit positions are used.
* {@link #getNumberOfBitsInUnfinishedByte()} returns how many bit positions
* that are used.
* <p>
* If the current position is at a byte boundary, 0 is returned.
* @return The value of the unfinished byte.
*/
int getUnfinishedByte();
/**
* Get the number of bits that have been written to the last byte.
* <p>
* If the current position is at a byte boundary, 0 is returned.
* @return The number of bits that have been written to the last byte. This
* is a number between 0 and 7 (inclusive).
*/
int getNumberOfBitsInUnfinishedByte();
/**
* Write a single bit.
* @param val The bit ({@code true == 1}, {@code false == 0}).
* @throws IOException On I/O errors.
*/
void writeBit(boolean val) throws IOException;
/**
* Write up to eight bits.
* @param val The value to write. The bits written are the {@code no}
* rightmost bits of {@code val}. It is not verified that {@code val} fits
* within its {@code no} rightmost bits. If it does not, the written value
* is simply truncated.
* @param no The number of bits to write. This must be between 0 and 8
* (inclusive).
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or greater
* than 8.
* @throws IOException On I/O errors
* @see #writeBitsLittleEndian(int, int)
*/
void writeBits(int val, int no) throws IndexOutOfBoundsException, IOException;
/**
* Write up to 32 bits. The bits are written little endian with the most
* significant bit first.
* @param val The value to write. The bits written are the {@code no}
* rightmost bits of {@code val}. It is not verified that {@code val} fits
* within its {@code no} rightmost bits. If it does not, the written value
* is simply truncated.
* @param no The number of bits to write. This must be between 0 and 32
* (inclusive)
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or more
* than 32.
* @throws IOException On I/O errors.
* @see #writeBits(int, int)
*/
void writeBitsLittleEndian(int val, int no) throws IndexOutOfBoundsException, IOException;
/**
* Write an array of bytes to the output. Unlike
* {@link #write(byte[], int, int)}, this method does not require that the
* current position is at a byte boundary.
* @param barr The bytes to write.
* @param off The offset in the byte array.
* @param len The number of bytes to write.
* @throws IndexOutOfBoundsException If the offset or the length is negative
* or if the offset + length is larger than the byte array.
* @throws IOException On I/O errors
* @see #write(byte[], int, int)
*/
void writeBytes(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException;
/**
* See {@link java.io.OutputStream#write(int)}.
* <p>
* This method requires that the current position of the output is at a byte
* boundary.
* @param b The byte to write (0 - 255).
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
* @see java.io.OutputStream#write(int)
*/
void write(int b) throws IOException;
/**
* See {@link java.io.OutputStream#write(byte[])}.
* <p>
* This method requires that the current position of the output is at a byte
* boundary.
* @param barr The bytes to write.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
* @see java.io.OutputStream#write(byte[])
*/
void write(byte[] barr) throws IOException;
/**
* See {@link java.io.OutputStream#write(byte[], int, int)}.
* <p>
* This method requires that the current position of the output is at a byte
* boundary.
* @param barr The bytes to write.
* @param off The offset in the byte array.
* @param len The number of bytes to write.
* @throws IndexOutOfBoundsException If the offset or the length is negative
* or if the offset + length is larger than the byte array.
* @throws IOException On I/O errors or if the current position is not at a
* byte boundary.
* @see java.io.OutputStream#write(byte[], int, int)
* @see #writeBytes(byte[], int, int)
*/
void write(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException;
}

View File

@ -0,0 +1,380 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.io;
import java.io.IOException;
import java.io.InputStream;
/**
* This is an input stream that a client can use to read single or several bits
* from an underlying {@link InputStream}. The bits are read in little-endian
* bit order.
* @author Karl Gustafsson
* @since 1.1
*/
public class LittleEndianBitInputStream extends InputStream implements BitInput
{
// 2^0
private static final int POINTER_START_OF_BYTE = 0;
// 2^7
private static final int POINTER_END_OF_BYTE = 7;
private final InputStream m_in;
// The current byte
private int m_curByte;
// The pointer to the current bit location in the current byte.
private int m_pointerInByte = POINTER_START_OF_BYTE;
private long m_numberOfBytesRead = 0;
public LittleEndianBitInputStream(InputStream in) throws IOException
{
// Null check
in.getClass();
m_in = in;
m_curByte = in.read();
// Don't increment the number of read bytes counter. It is always one
// byte behind.
}
private int readByte() throws IOException
{
int res = m_in.read();
m_numberOfBytesRead += res != -1 ? 1 : 0;
return res;
}
private void incrementPointerPosition() throws IOException
{
if (m_pointerInByte == POINTER_END_OF_BYTE)
{
// Read a new byte
m_curByte = readByte();
m_pointerInByte = POINTER_START_OF_BYTE;
}
else
{
// Increment the pointer only if we're not at EOF
if (!isAtEof())
{
m_pointerInByte++;
}
}
}
public boolean isAtEof()
{
return m_curByte == -1;
}
/**
* Get the number of whole bytes read this far.
* @return The number of bytes read this far.
*/
public long getNumberOfBytesRead()
{
return m_numberOfBytesRead;
}
private void assertNotAtEOF() throws IOException
{
if (isAtEof())
{
throwIOException("At EOF");
}
}
private boolean isAtByteBoundary()
{
return m_pointerInByte == POINTER_START_OF_BYTE;
}
private void assertAtByteBoundary() throws IOException
{
if (!isAtByteBoundary())
{
throwIOException("Not at byte boundary. Position: pos=" + m_pointerInByte);
}
}
private void throwIOException(String msg, long pos) throws IOException
{
throw new IOException(msg + ". Position in stream: " + pos);
}
private void throwIOException(String msg) throws IOException
{
throw new IOException(msg + ". Position in stream: " + m_numberOfBytesRead);
}
public void skipToByteBoundary() throws IOException
{
assertNotAtEOF();
if (m_pointerInByte != POINTER_START_OF_BYTE)
{
m_pointerInByte = POINTER_START_OF_BYTE;
m_curByte = readByte();
}
}
public boolean readBit() throws IOException
{
assertNotAtEOF();
boolean res = (m_curByte & (1 << (7 - m_pointerInByte))) > 0;
incrementPointerPosition();
return res;
}
public int readBits(int no) throws IOException, IndexOutOfBoundsException
{
if (no < 0 || no > 8)
{
throw new IndexOutOfBoundsException("Invalid number of bits: " + no + ". Must be between 0 and 8 (inclusive)");
}
assertNotAtEOF();
if (no == 0)
{
return 0;
}
// Bytes are stored little bit endian
if (no + m_pointerInByte <= 8)
{
// All bits to read fit in the current byte
int res = (m_curByte >> (8 - no - m_pointerInByte)) & ((1 << no) - 1);
m_pointerInByte += no;
if (m_pointerInByte > POINTER_END_OF_BYTE)
{
m_curByte = readByte();
m_pointerInByte = POINTER_START_OF_BYTE;
}
return res;
}
else
{
// Read remaining bits + first bits of next byte
int noToReadInByte2 = no - (8 - m_pointerInByte);
int res = (m_curByte & ((1 << (8 - m_pointerInByte)) - 1)) << noToReadInByte2;
m_curByte = readByte();
assertNotAtEOF();
m_pointerInByte = noToReadInByte2;
res += m_curByte >> (8 - noToReadInByte2);
return res;
}
}
public int readBitsLittleEndian(int no) throws IOException, IndexOutOfBoundsException
{
if (no < 0 || no > 32)
{
throw new IndexOutOfBoundsException("Invalid number of bits: " + no + ". Must be between 0 and 32 (inclusive)");
}
if (no == 0)
{
return 0;
}
int noReads = no / 8;
int mod = no % 8;
int res = 0;
if (mod != 0)
{
res = readBits(mod) << (noReads * 8);
}
for (int i = 0; i < noReads; i++)
{
res += readBits(8) << ((noReads - i - 1) * 8);
}
return res;
}
public byte[] readBytes(byte[] barr, int off, int len) throws IOException, IndexOutOfBoundsException
{
if (off < 0)
{
throw new IndexOutOfBoundsException("Invalid offset " + off + ". It must be >= 0");
}
if (len < 0)
{
throw new IndexOutOfBoundsException("Invalid length " + len + ". It must be >= 0");
}
if (off + len > barr.length)
{
throw new IndexOutOfBoundsException("Invalid offset + length (" + off + " + " + len + "). It must be <= the length of the supplied array (" + barr.length + ")");
}
assertNotAtEOF();
if (len == 0)
{
return barr;
}
if (isAtByteBoundary())
{
// Special case: we are at the byte boundary. We just have to read
// the len next bytes and return them.
// The read method takes care of updating all internal state.
int noRead = read(barr, off, len);
if (noRead != len)
{
throwIOException("Unexpected EOF. Wanted to read " + len + " bytes. Got " + noRead, m_numberOfBytesRead - noRead);
}
}
else
{
int noRead = m_in.read(barr, off, len);
m_numberOfBytesRead += noRead;
if (noRead != len)
{
m_curByte = -1;
m_pointerInByte = POINTER_START_OF_BYTE;
throwIOException("Unexpected EOF. Wanted to read " + len + " bytes. Got " + noRead, m_numberOfBytesRead - noRead);
}
// Shift bytes in the result array. Bytes are stored little (bit-)
// endian.
int lastByte = m_curByte;
m_curByte = barr[off + len - 1] & 0xFF;
// The distance to shift the second byte to the right.
int rightShiftDistance = 8 - m_pointerInByte;
for (int i = off; i < off + len; i++)
{
int newLastByte = barr[i];
barr[i] = (byte) (((lastByte << m_pointerInByte) | ((barr[i] & 0xFF) >>> rightShiftDistance)) & 0xFF);
lastByte = newLastByte;
}
}
return barr;
}
@Override
public int read() throws IOException
{
assertAtByteBoundary();
int res = m_curByte;
if (m_curByte != -1)
{
m_curByte = readByte();
}
return res;
}
@Override
public int read(byte[] barr) throws IOException
{
return read(barr, 0, barr.length);
}
@Override
public int read(byte[] barr, int offset, int len) throws IndexOutOfBoundsException, IOException
{
if (offset < 0)
{
throw new IndexOutOfBoundsException("Illegal offset: " + offset);
}
else if (len < 0)
{
throw new IndexOutOfBoundsException("Illegal length: " + len);
}
else if ((offset + len) > barr.length)
{
throw new IndexOutOfBoundsException("Illegal offset + length: " + offset + " + " + len + ". Longer than the byte array: " + barr.length);
}
assertAtByteBoundary();
if (isAtEof())
{
return -1;
}
else
{
barr[offset] = (byte) m_curByte;
int res = 1;
if (len > 1)
{
int noRead = m_in.read(barr, offset + 1, len - 1);
if (noRead > 0)
{
res += noRead;
m_numberOfBytesRead += noRead;
}
}
m_curByte = readByte();
return res;
}
}
@Override
public long skip(long n) throws IOException
{
assertAtByteBoundary();
if (n <= 0L)
{
return 0L;
}
else
{
if (isAtEof())
{
return 0L;
}
if (n > 1L)
{
long noToSkip = n - 1L;
long noSkipped = m_in.skip(noToSkip);
m_numberOfBytesRead += noSkipped;
if (noSkipped < noToSkip)
{
// At EOF
m_curByte = -1;
return noSkipped + 1;
}
else
{
m_curByte = readByte();
return noSkipped + 1;
}
}
else
{
m_curByte = readByte();
return 1L;
}
}
}
@Override
public int available() throws IOException
{
assertAtByteBoundary();
return m_in.available() + m_curByte != -1 ? 1 : 0;
}
@Override
public void close() throws IOException
{
m_in.close();
}
}

View File

@ -0,0 +1,265 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.io;
import java.io.IOException;
import java.io.OutputStream;
/**
* This is an {@link OutputStream} that implements {@link BitOutput} and hence
* can be used to write individual bits to the output. The bits are stored in
* little-endian order.
* @author Karl Gustafsson
* @since 1.1
*/
public class LittleEndianBitOutputStream extends OutputStream implements BitOutput
{
// 2^0
private static final int POINTER_START_OF_BYTE = 0;
// 2^7
private static final int POINTER_END_OF_BYTE = 7;
private final OutputStream m_out;
// The current byte
private int m_curByte = 0;
// The pointer to the current bit location in the current byte.
private int m_pointerInByte = POINTER_START_OF_BYTE;
private long m_numberOfBytesWritten = 0;
public LittleEndianBitOutputStream(OutputStream wrapped)
{
// Null check
wrapped.getClass();
m_out = wrapped;
}
private boolean isAtByteBoundary()
{
return m_pointerInByte == POINTER_START_OF_BYTE;
}
private void assertAtByteBoundary() throws IOException
{
if (!isAtByteBoundary())
{
throwIOException("Not at byte boundary. Position: pos=" + m_pointerInByte);
}
}
private void throwIOException(String msg) throws IOException
{
throw new IOException(msg + ". Position in stream: " + m_numberOfBytesWritten);
}
private void writeCurByte() throws IOException
{
m_out.write(m_curByte);
m_numberOfBytesWritten++;
m_pointerInByte = POINTER_START_OF_BYTE;
m_curByte = 0;
}
/**
* Get the total number of whole <i>bytes</i> written by this stream so far.
* @return The number of whole bytes written.
*/
public long getNumberOfBytesWritten()
{
return m_numberOfBytesWritten;
}
public int getUnfinishedByte()
{
return m_pointerInByte > 0 ? m_curByte >>> (7 - (m_pointerInByte - 1)) : 0;
}
public int getNumberOfBitsInUnfinishedByte()
{
return m_pointerInByte;
}
public void padToByteBoundary() throws IOException
{
if (m_pointerInByte > POINTER_START_OF_BYTE)
{
writeCurByte();
}
}
public void writeBit(boolean val) throws IOException
{
if (val)
{
m_curByte = m_curByte | 1 << (7 - m_pointerInByte);
}
m_pointerInByte++;
if (m_pointerInByte > POINTER_END_OF_BYTE)
{
// Write the current byte and start a new one
writeCurByte();
}
}
public void writeBits(int val, int no) throws IOException, IndexOutOfBoundsException
{
if (no < 0 || no > 8)
{
throw new IndexOutOfBoundsException("Invalid number of bits " + no + ". Must be between 0 and 8 (inclusive)");
}
if (no == 0)
{
return;
}
if (m_pointerInByte + no <= 8)
{
// All bits to write fit in the current byte
m_curByte = m_curByte | ((val & ((1 << no) - 1)) << (8 - m_pointerInByte - no));
m_pointerInByte += no;
if (m_pointerInByte > POINTER_END_OF_BYTE)
{
writeCurByte();
}
}
else
{
// Bits will have to be written in the next byte too
int bitsToWriteInCurByte = 8 - m_pointerInByte;
int bitsToWriteInNextByte = no - bitsToWriteInCurByte;
m_curByte = m_curByte | (val >>> (no - bitsToWriteInCurByte));
writeCurByte();
m_curByte = (val & ((1 << bitsToWriteInNextByte) - 1)) << (8 - bitsToWriteInNextByte);
m_pointerInByte = bitsToWriteInNextByte;
}
}
public void writeBitsLittleEndian(int val, int no) throws IndexOutOfBoundsException, IOException
{
if (no < 0 || no > 32)
{
throw new IndexOutOfBoundsException("Invalid number of bits to write " + no + ". It must be between 0 and 32 (inclusive)");
}
if (no == 0)
{
return;
}
int noWrites = no / 8;
int mod = no % 8;
if (mod != 0)
{
writeBits(val >>> (noWrites * 8), mod);
}
for (int i = 0; i < noWrites; i++)
{
writeBits(val >>> ((noWrites - i - 1) * 8), 8);
}
}
public void writeBytes(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException
{
if (off < 0)
{
throw new IndexOutOfBoundsException("Invalid offset " + off + ". It must be >= 0");
}
if (len < 0)
{
throw new IndexOutOfBoundsException("Invalid length " + len + ". It must be >= 0");
}
if (off + len > barr.length)
{
throw new IndexOutOfBoundsException("Invalid offset + length (" + off + " + " + len + "). It must be <= the length of the supplied array (" + barr.length + ")");
}
if (len == 0)
{
return;
}
if (isAtByteBoundary())
{
// Special case
m_out.write(barr, off, len);
m_numberOfBytesWritten += len;
}
else
{
// Copy the bytes to write to a new array. We cannot modify barr,
// even if it is tempting.
byte[] toWrite = new byte[len];
System.arraycopy(barr, off, toWrite, 0, len);
int prevByte = m_curByte;
int leftShiftDistance = 8 - m_pointerInByte;
for (int i = 0; i < len; i++)
{
// Shift in bits from the previous byte and shift out bytes
// from this byte
int nextPrevByte = (toWrite[i] & 0xFF) << leftShiftDistance;
toWrite[i] = (byte) ((prevByte | ((toWrite[i] & 0xFF) >>> m_pointerInByte)) & 0xFF);
prevByte = nextPrevByte;
}
m_curByte = prevByte & 0xFF;
m_out.write(toWrite);
m_numberOfBytesWritten += len;
}
}
@Override
public void write(int b) throws IOException
{
assertAtByteBoundary();
m_out.write(b);
m_numberOfBytesWritten++;
}
@Override
public void write(byte[] barr) throws IOException
{
write(barr, 0, barr.length);
}
@Override
public void write(byte[] barr, int off, int len) throws IOException
{
assertAtByteBoundary();
m_out.write(barr, off, len);
m_numberOfBytesWritten += len;
}
/**
* Close the output stream.
* <p>
* This method does not automatically pad the last written bits to a full
* byte. If there are bits written to it the stream must be padded before
* closing it. See {@link #padToByteBoundary()}.
*/
@Override
public void close() throws IOException
{
m_out.close();
super.close();
}
}

View File

@ -0,0 +1,25 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Support classes that probably would have been in {@code java.io} if they
* had been a part of Java.
* @since 1.0
* @author Karl Gustafsson
*/
package org.at4j.support.io;

View File

@ -0,0 +1,58 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
/**
* This is a base class for exceptions in this project. It inherits
* {@link RuntimeException}, so it is unchecked.
* @author Karl Gustafsson
* @since 1.0
*/
public class At4JException extends RuntimeException
{
private static final long serialVersionUID = 1L;
/**
* Create an exception with a message.
* @param msg The message.
*/
public At4JException(String msg)
{
super(msg);
}
/**
* Create an exception that wraps another exception.
* @param t The other exception.
*/
public At4JException(Throwable t)
{
super(t);
}
/**
* Create an exception that wraps another exception and has a message.
* @param msg The message.
* @param t The other exception.
*/
public At4JException(String msg, Throwable t)
{
super(msg, t);
}
}

View File

@ -0,0 +1,197 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
/**
* This class represents a signed integer value (i.e. a plain {@code int}
* value). If the {@link java.lang.Integer} class had not been declared {@code
* final}, this class would probably have extended it.
* <p>
* Signed integer objects are created by calling any of the static creation
* methods on this class.
* <p>
* Instances of this class are immutable.
* @author Karl Gustafsson
* @since 1.1.1
* @see UnsignedInteger
* @see SignedLong
*/
public class SignedInteger implements Comparable<SignedInteger>
{
/**
* This constant represents the value {@code 0}.
*/
public static final SignedInteger ZERO = new SignedInteger(0);
/**
* This constant represents the value {@code 1}.
*/
public static final SignedInteger ONE = new SignedInteger(1);
private final int m_value;
/**
* Create a new signed integer value.
* @param value The value.
*/
private SignedInteger(int value)
{
m_value = value;
}
/**
* Create a new signed integer value.
* @param value The integer value.
* @return The signed integer value.
*/
public static SignedInteger valueOf(int value)
{
if (value == 0)
{
return ZERO;
}
else if (value == 1)
{
return ONE;
}
else
{
return new SignedInteger(value);
}
}
/**
* Get the signed integer value.
* @return The signed integer value.
*/
public long intValue()
{
return m_value;
}
/**
* Get the signed integer value represented as a big-endian byte array (four
* bytes long).
* @return The integer value represented as a big-endian byte array.
* @see #fromBigEndianByteArray(byte[])
* @see #getLittleEndianByteArray()
*/
public byte[] getBigEndianByteArray()
{
byte[] res = new byte[4];
res[0] = (byte) m_value;
res[1] = (byte) (m_value >> 8);
res[2] = (byte) (m_value >> 16);
res[3] = (byte) (m_value >> 24);
return res;
}
/**
* Create a signed integer value from an four bytes long big-endian byte
* array.
* @param barr The byte array. It must be four bytes long.
* @return The signed four value.
* @throws IllegalArgumentException If the byte array is not four bytes
* long.
* @see #getBigEndianByteArray()
* @see #fromLittleEndianByteArray(byte[])
*/
public static SignedInteger fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 4)
{
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be four bytes long");
}
int value = barr[0] & 0xFF;
value += ((barr[1] & 0xFFL) << 8);
value += ((barr[2] & 0xFFL) << 16);
value += ((barr[3] & 0xFFL) << 24);
return valueOf(value);
}
/**
* Get the signed integer value represented as a little-endian byte array
* (four bytes long).
* @return The integer value represented as a little-endian byte array.
* @see #getBigEndianByteArray()
* @see #fromBigEndianByteArray(byte[])
*/
public byte[] getLittleEndianByteArray()
{
byte[] res = new byte[4];
res[0] = (byte) (m_value >> 24);
res[1] = (byte) (m_value >> 16);
res[2] = (byte) (m_value >> 8);
res[3] = (byte) m_value;
return res;
}
/**
* Create a signed integer value from an four bytes long little-endian byte
* array.
* @param barr The byte array. It must be four bytes long.
* @return The signed integer value.
* @throws IllegalArgumentException If the byte array is not four bytes
* long.
* @see #getLittleEndianByteArray()
* @see #fromBigEndianByteArray(byte[])
*/
public static SignedInteger fromLittleEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 4)
{
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be four bytes long");
}
int value = barr[3] & 0xFF;
value += ((barr[2] & 0xFFL) << 8);
value += ((barr[1] & 0xFFL) << 16);
value += ((barr[0] & 0xFFL) << 24);
return valueOf(value);
}
@Override
public boolean equals(Object o)
{
if (o != null && o instanceof SignedInteger)
{
return m_value == ((SignedInteger) o).m_value;
}
else
{
return false;
}
}
@Override
public int hashCode()
{
return m_value;
}
public int compareTo(SignedInteger l2)
{
return Integer.valueOf(m_value).compareTo(Integer.valueOf(l2.m_value));
}
@Override
public String toString()
{
return "" + m_value;
}
}

View File

@ -0,0 +1,213 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
/**
* This class represents a signed long value (i.e. a plain {@code long} value).
* If the {@link java.lang.Long} class had not been declared {@code final}, this
* class would probably have extended it.
* <p>
* Signed long objects are created by calling any of the static creation methods
* on this class.
* <p>
* Instances of this class are immutable.
* @author Karl Gustafsson
* @since 1.0
* @see UnsignedLong
* @see SignedInteger
*/
public class SignedLong implements Comparable<SignedLong>
{
/**
* This constant represents the value {@code 0}.
*/
public static final SignedLong ZERO = new SignedLong(0);
/**
* This constant represents the value {@code 1}.
*/
public static final SignedLong ONE = new SignedLong(1);
private final long m_value;
/**
* Create a new signed long value.
* @param value The value.
*/
private SignedLong(long value)
{
m_value = value;
}
/**
* Create a new signed long value.
* @param value The long value.
* @return The signed long value.
*/
public static SignedLong valueOf(long value)
{
if (value == 0)
{
return ZERO;
}
else if (value == 1)
{
return ONE;
}
else
{
return new SignedLong(value);
}
}
/**
* Get the signed long value.
* @return The signed long value.
*/
public long longValue()
{
return m_value;
}
/**
* Get the signed long value represented as a big-endian byte array (eight
* bytes long).
* @return The long value represented as a big-endian byte array.
* @see #fromBigEndianByteArray(byte[])
* @see #getLittleEndianByteArray()
*/
public byte[] getBigEndianByteArray()
{
byte[] res = new byte[8];
res[0] = (byte) m_value;
res[1] = (byte) (m_value >> 8);
res[2] = (byte) (m_value >> 16);
res[3] = (byte) (m_value >> 24);
res[4] = (byte) (m_value >> 32);
res[5] = (byte) (m_value >> 40);
res[6] = (byte) (m_value >> 48);
res[7] = (byte) (m_value >> 56);
return res;
}
/**
* Create a signed long value from an eight bytes long big-endian byte
* array.
* @param barr The byte array. It must be eight bytes long.
* @return The signed long value.
* @throws IllegalArgumentException If the byte array is not eight bytes
* long.
* @see #getBigEndianByteArray()
* @see #fromLittleEndianByteArray(byte[])
*/
public static SignedLong fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 8)
{
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be eight bytes long");
}
long value = barr[0] & 0xFF;
value += ((barr[1] & 0xFFL) << 8);
value += ((barr[2] & 0xFFL) << 16);
value += ((barr[3] & 0xFFL) << 24);
value += ((barr[4] & 0xFFL) << 32);
value += ((barr[5] & 0xFFL) << 40);
value += ((barr[6] & 0xFFL) << 48);
value += ((barr[7] & 0xFFL) << 56);
return valueOf(value);
}
/**
* Get the signed long value represented as a little-endian byte array
* (eight bytes long).
* @return The long value represented as a little-endian byte array.
* @see #getBigEndianByteArray()
* @see #fromBigEndianByteArray(byte[])
*/
public byte[] getLittleEndianByteArray()
{
byte[] res = new byte[8];
res[0] = (byte) (m_value >> 56);
res[1] = (byte) (m_value >> 48);
res[2] = (byte) (m_value >> 40);
res[3] = (byte) (m_value >> 32);
res[4] = (byte) (m_value >> 24);
res[5] = (byte) (m_value >> 16);
res[6] = (byte) (m_value >> 8);
res[7] = (byte) m_value;
return res;
}
/**
* Create a signed long value from an eight bytes long little-endian byte
* array.
* @param barr The byte array. It must be eight bytes long.
* @return The signed long value.
* @throws IllegalArgumentException If the byte array is not eight bytes
* long.
* @see #getLittleEndianByteArray()
* @see #fromBigEndianByteArray(byte[])
*/
public static SignedLong fromLittleEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 8)
{
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be eight bytes long");
}
long value = barr[7] & 0xFF;
value += ((barr[6] & 0xFFL) << 8);
value += ((barr[5] & 0xFFL) << 16);
value += ((barr[4] & 0xFFL) << 24);
value += ((barr[3] & 0xFFL) << 32);
value += ((barr[2] & 0xFFL) << 40);
value += ((barr[1] & 0xFFL) << 48);
value += ((barr[0] & 0xFFL) << 56);
return valueOf(value);
}
@Override
public boolean equals(Object o)
{
if (o != null && o instanceof SignedLong)
{
return m_value == ((SignedLong) o).m_value;
}
else
{
return false;
}
}
@Override
public int hashCode()
{
return (int) (m_value ^ (m_value >>> 32));
}
public int compareTo(SignedLong l2)
{
return Long.valueOf(m_value).compareTo(Long.valueOf(l2.m_value));
}
@Override
public String toString()
{
return "" + m_value;
}
}

View File

@ -0,0 +1,197 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
/**
* This object represents an unsigned byte (eight bits) with a value between
* {@code 0} and {@code 255} (inclusive). It is immutable.
* <p>
* Unsigned byte instances are created by calling any of the static {@code
* valueOf} methods on this class.
* @author Karl Gustafsson
* @since 1.0
* @see UnsignedShort
* @see UnsignedInteger
* @see UnsignedLong
*/
public final class UnsignedByte implements Serializable, Comparable<UnsignedByte>
{
private static final long serialVersionUID = 1L;
/**
* The maximum value of an unsigned byte (255).
*/
public static final short MAX_VALUE = (1 << 8) - 1;
/**
* The minimum value of an unsigned byte (0).
*/
public static final short MIN_VALUE = 0;
/**
* The value 0.
*/
public static final UnsignedByte ZERO = new UnsignedByte((byte) 0);
/**
* The value 1.
*/
public static final UnsignedByte ONE = new UnsignedByte((byte) 1);
private final byte m_value;
private UnsignedByte(byte value)
{
m_value = value;
}
/**
* Create an unsigned byte value from the supplied byte value. The supplied
* value is treated as if it was unsigned, which means that negative
* argument values will result in unsigned byte values between 128 and 255.
* @param value The value.
* @return The unsigned byte value.
* @see #valueOf(short)
* @see #valueOf(int)
*/
public static UnsignedByte valueOf(byte value)
{
switch (value)
{
case 0:
return ZERO;
case 1:
return ONE;
default:
return new UnsignedByte(value);
}
}
private static UnsignedByte valueOfSafe(int value)
{
return valueOf((byte) (value & 0xFF));
}
/**
* Create a new unsigned byte value from the supplied {@code short} value
* which must be in the range {@code 0} to {@code 255} (inclusive).
* @param value The value.
* @return An unsigned byte value.
* @throws IllegalArgumentException If the supplied value is not in the
* permitted range.
*/
public static UnsignedByte valueOf(short value) throws IllegalArgumentException
{
if ((value < MIN_VALUE) || (value > MAX_VALUE))
{
throw new IllegalArgumentException("Illegal unsigned byte value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
}
return valueOf((byte) (value & 0xFF));
}
/**
* Create a new unsigned byte value from the supplied {@code int} value
* which must be in the range {@code 0} to {@code 255} (inclusive).
* @param value The value.
* @return An unsigned byte value.
* @throws IllegalArgumentException If the supplied value is not in the
* permitted range.
*/
public static UnsignedByte valueOf(int value) throws IllegalArgumentException
{
if ((value < MIN_VALUE) || (value > MAX_VALUE))
{
throw new IllegalArgumentException("Illegal unsigned byte value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
}
return valueOf((byte) (value & 0xFF));
}
/**
* Get the unsigned byte value as an {@code int}.
* @return The value.
*/
public int intValue()
{
return m_value & 0xFF;
}
/**
* Get the unsigned byte value as a {@code short}.
* @return The value.
*/
public short shortValue()
{
return (short) (m_value & 0xFF);
}
/**
* Get the unsigned byte value as a signed byte value between {@code -128}
* and {@code 127} (inclusive).
* @return The value.
*/
public byte byteValue()
{
return m_value;
}
/**
* Is the specified bit set in the byte value?
* @param no The index number of the bit. Bit 0 is the bit representing the
* value 1, bit 7 is the bit representing the value 128.
* @return {@code true} if the specified bit is set.
* @throws IllegalArgumentException If {@code no} is not in the range
* {@code 0 <= no <= 7} (inclusive).
*/
public boolean isBitSet(int no) throws IllegalArgumentException
{
if (no < 0 || no > 7)
{
throw new IllegalArgumentException("Invalid bit number " + no + ". It must be between 0 and 7 (inclusive)");
}
return (m_value & (1 << no)) > 0;
}
@Override
public boolean equals(Object o)
{
return (o instanceof UnsignedByte) && (((UnsignedByte) o).m_value == m_value);
}
@Override
public int hashCode()
{
return m_value;
}
public int compareTo(UnsignedByte b2)
{
return intValue() - b2.intValue();
}
@Override
public String toString()
{
return Short.toString((short) (m_value & 0xFF));
}
}

View File

@ -0,0 +1,243 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
/**
* This object represents an unsigned integer (four bytes or 32 bits) with a
* value between {code 0} and {@code 4294967295}. It is immutable.
* <p>
* Unsigned integers are created by calling any of the static creation methods
* of this class.
* @author Karl Gustafsson
* @since 1.0
* @see SignedInteger
* @see UnsignedByte
* @see UnsignedShort
* @see UnsignedLong
*/
public final class UnsignedInteger implements Serializable, Comparable<UnsignedInteger>
{
private static final long serialVersionUID = 1L;
/**
* Each unsigned integer is four bytes long.
*/
public static final int SIZE = 4;
/**
* The maximum value of an unsigned integer (4294967295).
*/
public static final long MAX_VALUE = (1L << 32) - 1;
/**
* The minimum value of an unsigned integer (0).
*/
public static final int MIN_VALUE = 0;
/**
* The value 0.
*/
public static final UnsignedInteger ZERO = new UnsignedInteger(0);
/**
* The value 1.
*/
public static final UnsignedInteger ONE = new UnsignedInteger(1);
private final int m_value;
private UnsignedInteger(int value)
{
m_value = value;
}
/**
* Create a new unsigned integer. The supplied integer is treated as an
* unsigned value, which means that negative argument values will result in
* unsigned integer values between {@code 2147483648} and {@code 4294967295}
* (inclusive).
* @param value The signed integer value.
* @return An unsigned integer value.
*/
public static UnsignedInteger valueOf(int value)
{
switch (value)
{
case 0:
return ZERO;
case 1:
return ONE;
default:
return new UnsignedInteger(value);
}
}
/**
* Create an unsigned integer from the supplied long value which must be
* between {@code 0} and {@code 4294967295} (inclusive).
* @param value The value.
* @return The unsigned integer value.
* @throws IllegalArgumentException If the supplied value is not in the
* permitted range.
*/
public static UnsignedInteger valueOf(long value) throws IllegalArgumentException
{
if ((value < MIN_VALUE) || (value > MAX_VALUE))
{
throw new IllegalArgumentException("Illegal unsigned integer value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
}
return valueOf((int) (value & 0xFFFFFFFF));
}
/**
* Get the unsigned integer value represented as a {@code long}.
* @return The value.
*/
public long longValue()
{
return m_value & 0xFFFFFFFFL;
}
/**
* Get the unsigned integer value converted to a signed integer.
* @return The unsigned integer value converted to a signed integer.
*/
public int intValue()
{
return m_value;
}
/**
* Get the unsigned integer value as a big-endian, four bytes long byte
* array.
* @return The value represented as a big-endian byte array.
*/
public byte[] getBigEndianByteArray()
{
byte[] res = new byte[4];
res[0] = (byte) (m_value & 0xFF);
res[1] = (byte) ((m_value >>> 8) & 0xFF);
res[2] = (byte) ((m_value >>> 16) & 0xFF);
res[3] = (byte) ((m_value >>> 24) & 0xFF);
return res;
}
/**
* Create an unsigned integer value from a four bytes long, big-endian byte
* array.
* @param barr The byte array. It must be four bytes long.
* @return The unsigned integer.
* @throws IllegalArgumentException If the supplied byte array is not four
* bytes long.
* @see #fromBigEndianByteArray(byte[], int)
* @see #fromBigEndianByteArrayToLong(byte[], int)
*/
public static UnsignedInteger fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 4)
{
throw new IllegalArgumentException("The supplied byte array must be four bytes long");
}
return fromBigEndianByteArray(barr, 0);
}
/**
* Create an unsigned integer value from four bytes read from the given
* offset position in the supplied byte array. The most significant byte is
* the last byte read.
* @param barr The byte array to read from.
* @param offset The offset in the byte array where the least significant
* (first) byte is.
* @return An unsigned integer.
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
* or if the offset is negative.
* @see #fromBigEndianByteArray(byte[])
* @see #fromBigEndianByteArrayToLong(byte[], int)
*/
public static UnsignedInteger fromBigEndianByteArray(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
{
return valueOf((barr[offset] & 0xFF) + ((barr[offset + 1] & 0xFF) << 8) + ((barr[offset + 2] & 0xFF) << 16) + ((barr[offset + 3] & 0xFF) << 24));
}
/**
* Create a long value representing the unsigned integer value in the byte
* array at the specified offset. The most significant byte is the last byte
* read.
* @param barr The byte array to read from.
* @param offset The offset in the byte array where the least significant
* (first) byte is.
* @return A {@code long} representing the unsigned integer.
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
* or if the offset is negative.
* @see #fromBigEndianByteArray(byte[])
* @see #fromBigEndianByteArray(byte[], int)
* @see #fromLittleEndianByteArrayToLong(byte[], int)
* @since 1.1
*/
public static long fromBigEndianByteArrayToLong(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
{
return (barr[offset] & 0xFF) + ((barr[offset + 1] & 0xFF) << 8) + ((barr[offset + 2] & 0xFF) << 16) + ((barr[offset + 3] & 0xFF) << 24);
}
/**
* Create a long value representing the unsigned integer value in the byte
* array at the specified offset. The most significant byte is the first
* byte read.
* @param barr The byte array to read from.
* @param offset The offset in the byte array where the most significant
* (first) byte is.
* @return A {@code long} representing the unsigned integer.
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
* or if the offset is negative.
* @see #fromBigEndianByteArrayToLong(byte[], int)
* @since 1.1
*/
public static long fromLittleEndianByteArrayToLong(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
{
return (barr[offset + 3] & 0xFF) + ((barr[offset + 2] & 0xFF) << 8) + ((barr[offset + 1] & 0xFF) << 16) + ((barr[offset] & 0xFF) << 24);
}
@Override
public boolean equals(Object o)
{
return (o instanceof UnsignedInteger) && (((UnsignedInteger) o).m_value == m_value);
}
@Override
public int hashCode()
{
return m_value;
}
public int compareTo(UnsignedInteger i2)
{
return Long.valueOf(longValue()).compareTo(Long.valueOf(i2.longValue()));
}
@Override
public String toString()
{
return Long.toString(m_value & 0xFFFFFFFFL);
}
}

View File

@ -0,0 +1,224 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.math.BigInteger;
/**
* This object represents an unsigned long (eight bytes or 64 bits) with a value
* between {code 0} and {@code 18446744073709551615}. It is immutable.
* <p>
* Unsigned longs are created by calling any of the static creation methods of
* this class.
* @author Karl Gustafsson
* @since 1.0
* @see SignedLong
* @see UnsignedByte
* @see UnsignedShort
* @see UnsignedInteger
*/
public final class UnsignedLong implements Serializable, Comparable<UnsignedLong>
{
private static final long serialVersionUID = 1L;
/**
* The minimum allowed value (0).
*/
public static final BigInteger MIN_VALUE = BigInteger.valueOf(0L);
/**
* The maximum allowed value (18446744073709551615).
*/
public static final BigInteger MAX_VALUE;
/**
* The value zero.
*/
public static final UnsignedLong ZERO = new UnsignedLong(0L);
/**
* The value one.
*/
public static final UnsignedLong ONE = new UnsignedLong(1L);
private static final BigInteger HIGHEST_BIT_VALUE;
static
{
BigInteger mv = BigInteger.valueOf(2L);
MAX_VALUE = mv.pow(64).subtract(BigInteger.ONE);
HIGHEST_BIT_VALUE = mv.pow(63);
}
private final long m_value;
private UnsignedLong(long value)
{
m_value = value;
}
/**
* Create an unsigned long. The supplied value is treated as an unsigned
* long, which means that negative argument values will result in unsigned
* long values between {@code 9223372036854775808} and {@code
* 18446744073709551615} (inclusive).
* @param value The value.
* @return An unsigned long value.
*/
public static UnsignedLong valueOf(long value)
{
if (value == 0L)
{
return ZERO;
}
else if (value == 1L)
{
return ONE;
}
else
{
return new UnsignedLong(value);
}
}
/**
* Create an unsigned long value from the supplied {@link BigInteger} value
* which must be in the range {@code 0} to {@code 18446744073709551615}
* (inclusive)
* @param value The value.
* @return An unsigned long value.
* @throws IllegalArgumentException If the supplied value is negative or if
* it is greater than {@link #MAX_VALUE}.
*/
public static UnsignedLong valueOf(BigInteger value) throws IllegalArgumentException
{
if ((value.compareTo(MIN_VALUE) < 0) || (value.compareTo(MAX_VALUE) > 0))
{
throw new IllegalArgumentException("Illegal unsigned long value " + value + ". It must be between 0 and " + MAX_VALUE + " (inclusive)");
}
return valueOf(value.longValue());
}
/**
* Get the unsigned long value as a {@link BigInteger}.
* @return The unsigned long value as a {@link BigInteger}.
*/
public BigInteger bigIntValue()
{
BigInteger res = BigInteger.valueOf(m_value & 0x7FFFFFFFFFFFFFFFL);
return m_value < 0 ? res.add(HIGHEST_BIT_VALUE) : res;
}
/**
* Return the value as a signed long. If the value is less than
* {@link Long#MAX_VALUE}, it is returned as a positive long. If not, it is
* returned as a negative long.
* @return The value as a signed long value.
*/
public long longValue()
{
return m_value;
}
/**
* Get the unsigned long value as a big-endian, eight bytes long byte array.
* @return The value represented as a big-endian byte array.
*/
public byte[] getBigEndianByteArray()
{
byte[] res = new byte[8];
res[0] = (byte) (m_value & 0xFF);
res[1] = (byte) ((m_value >>> 8) & 0xFF);
res[2] = (byte) ((m_value >>> 16) & 0xFF);
res[3] = (byte) ((m_value >>> 24) & 0xFF);
res[4] = (byte) ((m_value >>> 32) & 0xFF);
res[5] = (byte) ((m_value >>> 40) & 0xFF);
res[6] = (byte) ((m_value >>> 48) & 0xFF);
res[7] = (byte) ((m_value >>> 56) & 0xFF);
return res;
}
/**
* Create an unsigned long value from a eight bytes long, big-endian byte
* array.
* @param barr The byte array. It must be eight bytes long.
* @return The unsigned long.
* @throws IllegalArgumentException If the supplied byte array is not eight
* bytes long.
* @see #fromBigEndianByteArray(byte[], int)
*/
public static UnsignedLong fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 8)
{
throw new IllegalArgumentException("The supplied byte array must be eight bytes long");
}
return fromBigEndianByteArray(barr, 0);
}
/**
* Create an unsigned long value from eight bytes read from the given offset
* position in the supplied byte array. The most significant byte is the
* last byte read.
* @param barr The byte array to read from.
* @param offset The offset in the byte array where the least significant
* (first) byte is.
* @return An unsigned long.
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
* or if the offset is negative.
* @see #fromBigEndianByteArray(byte[])
*/
public static UnsignedLong fromBigEndianByteArray(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
{
return valueOf((barr[offset] & 0xFFL) + ((barr[offset + 1] & 0xFFL) << 8) + ((barr[offset + 2] & 0xFFL) << 16) + ((barr[offset + 3] & 0xFFL) << 24) + ((barr[offset + 4] & 0xFFL) << 32) + ((barr[offset + 5] & 0xFFL) << 40)
+ ((barr[offset + 6] & 0xFFL) << 48) + ((barr[offset + 7] & 0xFFL) << 56));
}
@Override
public boolean equals(Object o)
{
if (o != null && o instanceof UnsignedLong)
{
return m_value == ((UnsignedLong) o).m_value;
}
else
{
return false;
}
}
@Override
public int hashCode()
{
return (int) (m_value ^ (m_value >>> 32));
}
public int compareTo(UnsignedLong l2)
{
return bigIntValue().compareTo(l2.bigIntValue());
}
@Override
public String toString()
{
return bigIntValue().toString();
}
}

View File

@ -0,0 +1,197 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.at4j.support.lang;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
/**
* This object represents an unsigned short value (two bytes or 16 bits) with a
* value between {code 0} and {@code 65535}. It is immutable.
* <p>
* Unsigned shorts are created by calling any of the static creation methods of
* this class.
* @author Karl Gustafsson
* @since 1.0
* @see UnsignedByte
* @see UnsignedInteger
* @see UnsignedLong
*/
public final class UnsignedShort implements Serializable, Comparable<UnsignedShort>
{
private static final long serialVersionUID = 1L;
/**
* Each unsigned short is two bytes long.
*/
public static final int SIZE = 2;
/**
* The maximum value of an unsigned short (65535).
*/
public static final int MAX_VALUE = (1 << 16) - 1;
/**
* The minimum value of an unsigned short (0).
*/
public static final int MIN_VALUE = 0;
/**
* The value 0.
*/
public static final UnsignedShort ZERO = new UnsignedShort((short) 0);
/**
* The value 1.
*/
public static final UnsignedShort ONE = new UnsignedShort((short) 1);
/**
* The value 1000.
*/
public static final UnsignedShort ONE_THOUSAND = new UnsignedShort((short) 1000);
private final short m_value;
private UnsignedShort(short value)
{
m_value = value;
}
/**
* Create a new unsigned short. The supplied short is treated as an unsigned
* value, which means that negative argument values will result in unsigned
* short values between {@code 32768} and {@code 65535} (inclusive).
* @param value The signed short value.
* @return An unsigned short value.
*/
public static UnsignedShort valueOf(short value)
{
switch (value)
{
case 0:
return ZERO;
case 1:
return ONE;
case 1000:
return ONE_THOUSAND;
default:
return new UnsignedShort(value);
}
}
/**
* Create an unsigned short from the supplied integer value which must be
* between {@code 0} and {@code 65535} (inclusive).
* @param value The value.
* @return The unsigned short value.
* @throws IllegalArgumentException If the supplied value is not in the
* permitted range.
*/
public static UnsignedShort valueOf(int value) throws IllegalArgumentException
{
if ((value < MIN_VALUE) || (value > MAX_VALUE))
{
throw new IllegalArgumentException("Illegal unsigned short value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
}
return valueOf((short) (value & 0xFFFF));
}
/**
* Get the unsigned short value.
* @return The value.
*/
public int intValue()
{
return m_value & 0xFFFF;
}
/**
* Get the unsigned short value as a big-endian, two bytes long byte array.
* @return The value represented as a big-endian byte array.
*/
public byte[] getBigEndianByteArray()
{
byte[] res = new byte[2];
res[0] = (byte) (m_value & 0xFF);
res[1] = (byte) ((m_value >>> 8) & 0xFF);
return res;
}
/**
* Create an unsigned short value from a two bytes long, big-endian byte
* array.
* @param barr The byte array. It must be two bytes long.
* @return The unsigned short.
* @throws IllegalArgumentException If the supplied byte array is not two
* bytes long.
* @see #fromBigEndianByteArray(byte[], int)
*/
public static UnsignedShort fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
{
if (barr.length != 2)
{
throw new IllegalArgumentException("The supplied byte array must be two bytes long");
}
return fromBigEndianByteArray(barr, 0);
}
/**
* Create an unsigned short value from two bytes read from the given offset
* position in the supplied byte array. The most significant byte is the
* last byte read.
* @param barr The byte array to read from.
* @param offset The offset in the byte array where the least significant
* (first) byte is.
* @return An unsigned short.
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
* or if the offset is negative.
* @see #fromBigEndianByteArray(byte[])
*/
public static UnsignedShort fromBigEndianByteArray(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
{
return valueOf((short) ((barr[offset] & 0xFF) + ((barr[offset + 1] & 0xFF) << 8) & 0xFFFF));
}
@Override
public boolean equals(Object o)
{
return (o instanceof UnsignedShort) && (((UnsignedShort) o).m_value == m_value);
}
@Override
public int hashCode()
{
return m_value;
}
public int compareTo(UnsignedShort s2)
{
return intValue() - s2.intValue();
}
@Override
public String toString()
{
return Integer.toString(m_value & 0xFFFF);
}
}

View File

@ -0,0 +1,25 @@
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
* Copyright (C) 2009 Karl Gustafsson
*
* This file is a part of AT4J
*
* AT4J is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Support classes that probably would have been in {@code java.lang} if they
* had been a part of Java.
* @since 1.0
* @author Karl Gustafsson
*/
package org.at4j.support.lang;

View File

@ -29,6 +29,6 @@
"depends": {
"fabricloader": ">=0.7.2",
"fabric": "*",
"minecraft": "1.16.*"
"minecraft": "1.15.*"
}
}