Merge pull request #9 from Szum123321/multiple_compression_format_dev
Multiple compression format dev
This commit is contained in:
commit
434a16011a
10
Copyright_Notice.txt
Normal file
10
Copyright_Notice.txt
Normal file
@ -0,0 +1,10 @@
|
||||
This project uses third party libraries as its dependencies and includes them in jar. Those are :
|
||||
Apache Commons Compress licensed under Apache License Version 2.0 which can be found at http://www.apache.org/licenses/
|
||||
Cotton config, Cotton logging, and Jankson-Fabric all by Cotton team licensed under MIT license which can be found at https://github.com/CottonMC/Cotton
|
||||
XZ for Java by Tukaani released as public domain. https://tukaani.org/xz/java.html
|
||||
parallelgzip by shevek under Apache 2.0 http://www.apache.org/licenses/
|
||||
Parallel BZip2 compression by Karl Gustafsson at http://at4j.sourceforge.net/ under GPL v3
|
||||
|
||||
Some code was partially or fully inspired by:
|
||||
Parallel zip compression: https://stackoverflow.com/questions/54624695/how-to-implement-parallel-zip-creation-with-scatterzipoutputstream-with-zip64-su
|
||||
answer by: https://stackoverflow.com/users/2987755/dkb
|
@ -15,14 +15,14 @@ Available operations are:
|
||||
* whitelist - here you can add, remove and list player that are allowed to run any operation within this mod despite not having high enough permission level*
|
||||
* whitelist - here you can add, remove and list player that are not allowed to run any operation within this mod despite having high enough permission level*
|
||||
|
||||
All of above can only be done by server admins(permission level 4 - configurable*) / player on single player with cheats on.
|
||||
All of the above can only be done by server admins(permission level 4 - configurable*) or player on a single player.
|
||||
|
||||
Feel free to use this mod in your modpack or on server!
|
||||
Feel free to use this mod in your modpack or on a server!
|
||||
|
||||
### Important
|
||||
|
||||
* Time format defaultly used by this mod is: dd.MM.yyyy_HH-mm-ss although it is configurable*.
|
||||
* _This mod contains **Cotton Config** and its dependencies as jars in jar, which are property of **CottonMC**_.
|
||||
* _This mod contains **Cotton Config** and its dependencies as jars in a jar, which are property of **CottonMC**_.
|
||||
|
||||
\* - feature available since 1.1.0
|
||||
|
||||
|
12
build.gradle
12
build.gradle
@ -15,6 +15,7 @@ minecraft {
|
||||
|
||||
repositories{
|
||||
maven { url 'http://server.bbkr.space:8081/artifactory/libs-release' }
|
||||
maven { url 'https://jitpack.io' }
|
||||
}
|
||||
|
||||
|
||||
@ -33,11 +34,14 @@ dependencies {
|
||||
include "io.github.cottonmc.cotton:cotton-logging:1.0.0-rc.4"
|
||||
include "io.github.cottonmc.cotton:cotton-config:1.0.0-rc.7"
|
||||
|
||||
include "org.apache.commons:commons-compress:1.9"
|
||||
include "org.apache.commons:commons-io:1.3.2"
|
||||
modCompile "org.apache.commons:commons-compress:1.13"
|
||||
include "org.apache.commons:commons-compress:1.13"
|
||||
|
||||
// PSA: Some older mods, compiled on Loom 0.2.1, might have outdated Maven POMs.
|
||||
// You may need to force-disable transitiveness on them.
|
||||
modCompile "org.tukaani:xz:1.8"
|
||||
include "org.tukaani:xz:1.8"
|
||||
|
||||
modCompile 'com.github.shevek:parallelgzip:master-SNAPSHOT'
|
||||
include 'com.github.shevek:parallelgzip:master-SNAPSHOT'
|
||||
}
|
||||
|
||||
processResources {
|
||||
|
@ -1,14 +1,14 @@
|
||||
# Done to increase the memory available to gradle.
|
||||
org.gradle.jvmargs=-Xmx1G
|
||||
|
||||
minecraft_version=20w14a
|
||||
yarn_mappings=20w14a+build.1
|
||||
loader_version=0.7.9+build.190
|
||||
minecraft_version=1.15.2
|
||||
yarn_mappings=1.15.2+build.15
|
||||
loader_version=0.8.2+build.194
|
||||
|
||||
#Fabric api
|
||||
fabric_version=0.5.7+build.314-1.16
|
||||
fabric_version=0.5.1+build.294-1.15
|
||||
|
||||
# Mod Properties
|
||||
mod_version = 1.1.1-1.15
|
||||
maven_group = net.szum123321
|
||||
archives_base_name = textile_backup
|
||||
mod_version = 1.2.0-1.15
|
||||
maven_group = net.szum123321
|
||||
archives_base_name = textile_backup
|
@ -54,8 +54,15 @@ public class ConfigHandler {
|
||||
@Comment("\nMaximum size of backup folder in kilo bytes. \n")
|
||||
public int maxSize = 0;
|
||||
|
||||
@Comment("\nCompression level \n0 - 9\n")
|
||||
public int compression = 1;
|
||||
@Comment("\nCompression level \n0 - 9\n Only available for zip compression.\n")
|
||||
public int compression = 6;
|
||||
|
||||
@Comment(value = "\nAvailable formats are:\n" +
|
||||
"ZIP - normal zip archive using standard deflate compression\n" +
|
||||
"GIZP - tar.gz using gzip compression\n" +
|
||||
"BZIP2 - tar.bz2 archive using bzip2 compression\n" +
|
||||
"LZMA - tar.xz using lzma compression\n")
|
||||
public ArchiveFormat format = ArchiveFormat.ZIP;
|
||||
|
||||
@Comment("\nPrint info to game out\n")
|
||||
public boolean log = true;
|
||||
@ -74,4 +81,21 @@ public class ConfigHandler {
|
||||
|
||||
@Comment("\nFormat of date&time used to name backup files.\n")
|
||||
public String dateTimeFormat = "dd.MM.yyyy_HH-mm-ss";
|
||||
|
||||
public enum ArchiveFormat {
|
||||
ZIP(".zip"),
|
||||
GZIP(".tar.gz"),
|
||||
BZIP2(".tar.bz2"),
|
||||
LZMA(".tar.xz");
|
||||
|
||||
private final String extension;
|
||||
|
||||
private ArchiveFormat(String extension){
|
||||
this.extension = extension;
|
||||
}
|
||||
|
||||
public String getExtension() {
|
||||
return extension;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ package net.szum123321.textile_backup.commands;
|
||||
import com.mojang.brigadier.builder.LiteralArgumentBuilder;
|
||||
import net.minecraft.server.command.CommandManager;
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.minecraft.text.LiteralText;
|
||||
import net.minecraft.world.dimension.DimensionType;
|
||||
import net.szum123321.textile_backup.core.BackupHelper;
|
||||
|
||||
@ -33,7 +32,6 @@ public class CleanupCommand {
|
||||
|
||||
private static int execute(ServerCommandSource source){
|
||||
BackupHelper.executeFileLimit(source, source.getMinecraftServer().getWorld(DimensionType.OVERWORLD).getLevelProperties().getLevelName());
|
||||
source.sendFeedback(new LiteralText("Done"), false);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -21,124 +21,152 @@ package net.szum123321.textile_backup.core;
|
||||
import net.fabricmc.loader.api.FabricLoader;
|
||||
import net.minecraft.server.MinecraftServer;
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.szum123321.textile_backup.ConfigHandler;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
public class BackupHelper {
|
||||
public static void create(MinecraftServer server, ServerCommandSource ctx, boolean save, String comment) {
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
public static Thread create(MinecraftServer server, ServerCommandSource ctx, boolean save, String comment) {
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("Backup started by: ");
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("Backup started by: ");
|
||||
|
||||
if( ctx != null )
|
||||
builder.append(ctx.getName());
|
||||
else
|
||||
builder.append("SERVER");
|
||||
if (ctx != null)
|
||||
builder.append(ctx.getName());
|
||||
else
|
||||
builder.append("SERVER");
|
||||
|
||||
builder.append(" on: ");
|
||||
builder.append(Utilities.getDateTimeFormatter().format(now));
|
||||
builder.append(" on: ");
|
||||
builder.append(Utilities.getDateTimeFormatter().format(now));
|
||||
|
||||
Utilities.log(builder.toString(), null);
|
||||
Utilities.log(builder.toString(), null);
|
||||
|
||||
Utilities.log("Saving server...", ctx);
|
||||
Utilities.log("Saving server...", ctx);
|
||||
|
||||
if(save)
|
||||
server.save(true, false, false);
|
||||
if (save)
|
||||
server.save(true, true, false);
|
||||
|
||||
Thread thread = new Thread(new MakeBackupThread(server, ctx, comment));
|
||||
Thread thread = new Thread(new MakeBackupThread(server, ctx, comment));
|
||||
|
||||
thread.start();
|
||||
}
|
||||
thread.start();
|
||||
|
||||
public static void executeFileLimit(ServerCommandSource ctx, String worldName){
|
||||
File root = getBackupRootPath(worldName);
|
||||
return thread;
|
||||
}
|
||||
|
||||
FileFilter filter = f -> f.getName().endsWith("zip");
|
||||
public static void executeFileLimit(ServerCommandSource ctx, String worldName) {
|
||||
File root = getBackupRootPath(worldName);
|
||||
|
||||
if(root.isDirectory() && root.exists()){
|
||||
if(TextileBackup.config.maxAge > 0){
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
if (root.isDirectory() && root.exists()) {
|
||||
if (TextileBackup.config.maxAge > 0) {
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
|
||||
Arrays.stream(root.listFiles()).forEach(f ->{
|
||||
if(f.exists() && f.isFile()){
|
||||
LocalDateTime creationTime;
|
||||
Arrays.stream(root.listFiles()).filter(f -> f.exists() && f.isFile()).forEach(f -> {
|
||||
LocalDateTime creationTime;
|
||||
|
||||
try {
|
||||
creationTime = LocalDateTime.from(
|
||||
Utilities.getDateTimeFormatter().parse(
|
||||
f.getName().split(".zip")[0].split("#")[0]
|
||||
)
|
||||
);
|
||||
}catch(Exception e){
|
||||
creationTime = LocalDateTime.from(
|
||||
Utilities.getBackupDateTimeFormatter().parse(
|
||||
f.getName().split(".zip")[0].split("#")[0]
|
||||
)
|
||||
);
|
||||
try {
|
||||
try {
|
||||
FileTime fileTime = (FileTime) Files.getAttribute(f.toPath(), "creationTime");
|
||||
|
||||
}
|
||||
creationTime = LocalDateTime.ofInstant(fileTime.toInstant(), ZoneOffset.UTC);
|
||||
} catch (IOException ignored) {
|
||||
try {
|
||||
creationTime = LocalDateTime.from(
|
||||
Utilities.getDateTimeFormatter().parse(
|
||||
f.getName().split(Objects.requireNonNull(getFileExtension(f)))[0].split("#")[0]
|
||||
)
|
||||
);
|
||||
} catch (Exception ignored2) {
|
||||
creationTime = LocalDateTime.from(
|
||||
Utilities.getBackupDateTimeFormatter().parse(
|
||||
f.getName().split(Objects.requireNonNull(getFileExtension(f)))[0].split("#")[0]
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if(now.toEpochSecond(ZoneOffset.UTC) - creationTime.toEpochSecond(ZoneOffset.UTC) > TextileBackup.config.maxAge) {
|
||||
Utilities.log("Deleting: " + f.getName(), ctx);
|
||||
f.delete();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
if (now.toEpochSecond(ZoneOffset.UTC) - creationTime.toEpochSecond(ZoneOffset.UTC) > TextileBackup.config.maxAge) {
|
||||
Utilities.log("Deleting: " + f.getName(), ctx);
|
||||
f.delete();
|
||||
}
|
||||
} catch (NullPointerException ignored3) {}
|
||||
});
|
||||
}
|
||||
|
||||
if(TextileBackup.config.backupsToKeep > 0 && root.listFiles().length > TextileBackup.config.backupsToKeep){
|
||||
int var1 = root.listFiles().length - TextileBackup.config.backupsToKeep;
|
||||
if (TextileBackup.config.backupsToKeep > 0 && root.listFiles().length > TextileBackup.config.backupsToKeep) {
|
||||
int var1 = root.listFiles().length - TextileBackup.config.backupsToKeep;
|
||||
|
||||
File[] files = root.listFiles(filter);
|
||||
assert files != null;
|
||||
File[] files = root.listFiles();
|
||||
assert files != null;
|
||||
|
||||
Arrays.sort(files);
|
||||
Arrays.sort(files);
|
||||
|
||||
for(int i = 0; i < var1; i++) {
|
||||
Utilities.log("Deleting: " + files[i].getName(), ctx);
|
||||
files[i].delete();
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < var1; i++) {
|
||||
Utilities.log("Deleting: " + files[i].getName(), ctx);
|
||||
files[i].delete();
|
||||
}
|
||||
}
|
||||
|
||||
if(TextileBackup.config.maxSize > 0 && FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize){
|
||||
Arrays.stream(root.listFiles()).sorted().forEach(e -> {
|
||||
if(FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize){
|
||||
Utilities.log("Deleting: " + e.getName(), ctx);
|
||||
e.delete();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
if (TextileBackup.config.maxSize > 0 && FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize) {
|
||||
Arrays.stream(root.listFiles()).filter(File::isFile).sorted().forEach(e -> {
|
||||
if (FileUtils.sizeOfDirectory(root) / 1024 > TextileBackup.config.maxSize) {
|
||||
Utilities.log("Deleting: " + e.getName(), ctx);
|
||||
e.delete();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static File getBackupRootPath(String worldName){
|
||||
File path = new File(TextileBackup.config.path);
|
||||
private static String getFileExtension(File f) {
|
||||
String[] parts = f.getName().split("\\.");
|
||||
|
||||
if(TextileBackup.config.perWorldBackup)
|
||||
path = path.toPath().resolve(worldName).toFile();
|
||||
switch (parts[parts.length - 1]) {
|
||||
case "zip":
|
||||
return ConfigHandler.ArchiveFormat.ZIP.getExtension();
|
||||
case "bz2":
|
||||
return ConfigHandler.ArchiveFormat.BZIP2.getExtension();
|
||||
case "gz":
|
||||
return ConfigHandler.ArchiveFormat.GZIP.getExtension();
|
||||
case "xz":
|
||||
return ConfigHandler.ArchiveFormat.LZMA.getExtension();
|
||||
|
||||
if(!path.exists()){
|
||||
try{
|
||||
path.mkdirs();
|
||||
}catch(Exception e){
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return FabricLoader
|
||||
.getInstance()
|
||||
.getGameDirectory()
|
||||
.toPath()
|
||||
.resolve(TextileBackup.config.path)
|
||||
.toFile();
|
||||
}
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
}
|
||||
public static File getBackupRootPath(String worldName) {
|
||||
File path = new File(TextileBackup.config.path).getAbsoluteFile();
|
||||
|
||||
if (TextileBackup.config.perWorldBackup)
|
||||
path = path.toPath().resolve(worldName).toFile();
|
||||
|
||||
if (!path.exists()) {
|
||||
try {
|
||||
path.mkdirs();
|
||||
} catch (Exception e) {
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
|
||||
return FabricLoader
|
||||
.getInstance()
|
||||
.getGameDirectory()
|
||||
.toPath()
|
||||
.resolve(TextileBackup.config.path)
|
||||
.toFile();
|
||||
}
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
A simple backup mod for Fabric
|
||||
Copyright (C) 2020 Szum123321
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.szum123321.textile_backup.core;
|
||||
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
public class Compressor {
|
||||
public static void createArchive(File in, File out, ServerCommandSource ctx){
|
||||
Utilities.log("Starting compression...", ctx);
|
||||
|
||||
try {
|
||||
File input = in.getCanonicalFile();
|
||||
|
||||
ZipOutputStream arc = new ZipOutputStream(new FileOutputStream(out));
|
||||
|
||||
arc.setLevel(TextileBackup.config.compression);
|
||||
arc.setComment("Created on: " + Utilities.getDateTimeFormatter().format(LocalDateTime.now()));
|
||||
|
||||
int rootPathLength = input.toString().length() + 1;
|
||||
|
||||
Files.walk(input.toPath()).filter(path -> !path.equals(input.toPath()) && path.toFile().isFile() && !TextileBackup.config.fileBlacklist.contains(path.toString().substring(rootPathLength))).forEach(path -> {
|
||||
try{
|
||||
File file = path.toAbsolutePath().toFile();
|
||||
|
||||
ZipEntry entry = new ZipEntry(file.getAbsolutePath().substring(rootPathLength));
|
||||
arc.putNextEntry(entry);
|
||||
entry.setSize(file.length());
|
||||
IOUtils.copy(new FileInputStream(file), arc);
|
||||
arc.closeEntry();
|
||||
}catch (IOException e){
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
arc.close();
|
||||
} catch (IOException e) {
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
|
||||
Utilities.log("Compression finished", ctx);
|
||||
}
|
||||
}
|
@ -21,15 +21,22 @@ package net.szum123321.textile_backup.core;
|
||||
import net.minecraft.server.MinecraftServer;
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.minecraft.world.dimension.DimensionType;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import net.szum123321.textile_backup.core.compressors.GenericTarCompressor;
|
||||
import net.szum123321.textile_backup.core.compressors.ParallelBZip2Compressor;
|
||||
import net.szum123321.textile_backup.core.compressors.ParallelZipCompressor;
|
||||
import org.anarres.parallelgzip.ParallelGZIPOutputStream;
|
||||
import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
|
||||
import org.at4j.comp.bzip2.BZip2OutputStream;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class MakeBackupThread implements Runnable {
|
||||
private MinecraftServer server;
|
||||
private ServerCommandSource ctx;
|
||||
private String comment;
|
||||
private final MinecraftServer server;
|
||||
private final ServerCommandSource ctx;
|
||||
private final String comment;
|
||||
|
||||
public MakeBackupThread(MinecraftServer server, ServerCommandSource ctx, String comment){
|
||||
this.server = server;
|
||||
@ -59,7 +66,28 @@ public class MakeBackupThread implements Runnable {
|
||||
return;
|
||||
}
|
||||
|
||||
ZipCompressor.createArchive(world, outFile, ctx);
|
||||
switch (TextileBackup.config.format) {
|
||||
case ZIP:
|
||||
ParallelZipCompressor.createArchive(world, outFile, ctx);
|
||||
break;
|
||||
|
||||
case BZIP2:
|
||||
ParallelBZip2Compressor.createArchive(world, outFile, ctx);
|
||||
break;
|
||||
|
||||
case GZIP:
|
||||
GenericTarCompressor.createArchive(world, outFile, ParallelGZIPOutputStream.class, ctx);
|
||||
break;
|
||||
|
||||
case LZMA:
|
||||
GenericTarCompressor.createArchive(world, outFile, XZCompressorOutputStream.class, ctx);
|
||||
break;
|
||||
|
||||
default:
|
||||
Utilities.log("Error! No correct compression format specified! using default compressor!", ctx);
|
||||
ParallelZipCompressor.createArchive(world, outFile, ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
BackupHelper.executeFileLimit(ctx, server.getWorld(DimensionType.OVERWORLD).getLevelProperties().getLevelName());
|
||||
|
||||
@ -69,6 +97,6 @@ public class MakeBackupThread implements Runnable {
|
||||
private String getFileName(){
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
|
||||
return Utilities.getDateTimeFormatter().format(now) + (comment != null ? "#" + comment.replace("#", ""): "") + ".zip";
|
||||
return Utilities.getDateTimeFormatter().format(now) + (comment != null ? "#" + comment.replace("#", "") : "") + TextileBackup.config.format.getExtension();
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.minecraft.text.LiteralText;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
public class Utilities {
|
||||
@ -12,6 +13,15 @@ public class Utilities {
|
||||
return os.toLowerCase().startsWith("win");
|
||||
}
|
||||
|
||||
public static boolean isBlacklisted(Path path) {
|
||||
for(String i : TextileBackup.config.fileBlacklist) {
|
||||
if(path.startsWith(i))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static DateTimeFormatter getDateTimeFormatter(){
|
||||
if(!TextileBackup.config.dateTimeFormat.equals(""))
|
||||
return DateTimeFormatter.ofPattern(TextileBackup.config.dateTimeFormat);
|
||||
|
@ -1,68 +0,0 @@
|
||||
/*
|
||||
A simple backup mod for Fabric
|
||||
Copyright (C) 2020 Szum123321
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.szum123321.textile_backup.core;
|
||||
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
public class ZipCompressor {
|
||||
public static void createArchive(File in, File out, ServerCommandSource ctx){
|
||||
Utilities.log("Starting compression...", ctx);
|
||||
|
||||
try (ZipOutputStream arc = new ZipOutputStream(new FileOutputStream(out))){
|
||||
arc.setLevel(TextileBackup.config.compression);
|
||||
arc.setComment("Created on: " + Utilities.getDateTimeFormatter().format(LocalDateTime.now()));
|
||||
|
||||
File input = in.getCanonicalFile();
|
||||
int rootPathLength = input.toString().length() + 1;
|
||||
|
||||
Files.walk(input.toPath()).filter(path -> !path.equals(input.toPath()) && path.toFile().isFile() && !TextileBackup.config.fileBlacklist.contains(path.toString().substring(rootPathLength))).forEach(path -> {
|
||||
try{
|
||||
File file = path.toAbsolutePath().toFile();
|
||||
|
||||
ZipEntry entry = new ZipEntry(file.getAbsolutePath().substring(rootPathLength));
|
||||
arc.putNextEntry(entry);
|
||||
entry.setSize(file.length());
|
||||
IOUtils.copy(new FileInputStream(file), arc);
|
||||
arc.closeEntry();
|
||||
}catch (IOException e){
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
} catch (IOException e) {
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
|
||||
Utilities.log("Compression finished", ctx);
|
||||
}
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
package net.szum123321.textile_backup.core.compressors;
|
||||
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import net.szum123321.textile_backup.core.Utilities;
|
||||
import org.apache.commons.compress.archivers.ArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.nio.file.Files;
|
||||
|
||||
public class GenericTarCompressor {
|
||||
public static void createArchive(File in, File out, Class<? extends OutputStream> CompressorStreamClass, ServerCommandSource ctx) {
|
||||
Utilities.log("Starting compression...", ctx);
|
||||
|
||||
long start = System.nanoTime();
|
||||
|
||||
try (FileOutputStream outStream = new FileOutputStream(out);
|
||||
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(outStream);
|
||||
OutputStream compressorStream = CompressorStreamClass.getDeclaredConstructor(OutputStream.class).newInstance(bufferedOutputStream);// CompressorStreamClass.getConstructor().newInstance(bufferedOutputStream);
|
||||
TarArchiveOutputStream arc = new TarArchiveOutputStream(compressorStream)) {
|
||||
|
||||
arc.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
|
||||
arc.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
|
||||
|
||||
File input = in.getCanonicalFile();
|
||||
|
||||
Files.walk(input.toPath()
|
||||
).filter(path -> !path.equals(input.toPath()) &&
|
||||
path.toFile().isFile() &&
|
||||
!Utilities.isBlacklisted(input.toPath().relativize(path))
|
||||
).forEach(path -> {
|
||||
File file = path.toAbsolutePath().toFile();
|
||||
|
||||
try (FileInputStream fin = new FileInputStream(file);
|
||||
BufferedInputStream bfin = new BufferedInputStream(fin)) {
|
||||
ArchiveEntry entry = arc.createArchiveEntry(file, input.toPath().relativize(path).toString());
|
||||
|
||||
arc.putArchiveEntry(entry);
|
||||
IOUtils.copy(bfin, arc);
|
||||
|
||||
arc.closeArchiveEntry();
|
||||
} catch (IOException e) {
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
arc.finish();
|
||||
} catch (IOException | IllegalAccessException | NoSuchMethodException | InstantiationException | InvocationTargetException e) {
|
||||
TextileBackup.logger.error(e.toString());
|
||||
}
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
||||
Utilities.log("Compression took: " + ((end - start) / 1000000000.0) + "s", ctx);
|
||||
}
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
package net.szum123321.textile_backup.core.compressors;
|
||||
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import net.szum123321.textile_backup.core.Utilities;
|
||||
import org.apache.commons.compress.archivers.ArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
import org.at4j.comp.bzip2.BZip2OutputStream;
|
||||
import org.at4j.comp.bzip2.BZip2OutputStreamSettings;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
|
||||
public class ParallelBZip2Compressor {
|
||||
public static void createArchive(File in, File out, ServerCommandSource ctx) {
|
||||
Utilities.log("Starting compression...", ctx);
|
||||
|
||||
BZip2OutputStreamSettings settings = new BZip2OutputStreamSettings().setNumberOfEncoderThreads(Runtime.getRuntime().availableProcessors());
|
||||
|
||||
long start = System.nanoTime();
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(out);
|
||||
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
|
||||
BZip2OutputStream bZip2OutputStream = new BZip2OutputStream(bufferedOutputStream, settings);
|
||||
TarArchiveOutputStream arc = new TarArchiveOutputStream(bZip2OutputStream)) {
|
||||
|
||||
arc.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
|
||||
arc.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
|
||||
|
||||
File input = in.getCanonicalFile();
|
||||
|
||||
Files.walk(input.toPath()
|
||||
).filter(path -> !path.equals(input.toPath()) &&
|
||||
path.toFile().isFile() &&
|
||||
!Utilities.isBlacklisted(input.toPath().relativize(path))
|
||||
).forEach(path -> {
|
||||
File file = path.toAbsolutePath().toFile();
|
||||
|
||||
try (FileInputStream fin = new FileInputStream(file);
|
||||
BufferedInputStream bfin = new BufferedInputStream(fin)) {
|
||||
ArchiveEntry entry = arc.createArchiveEntry(file, input.toPath().relativize(path).toString());
|
||||
|
||||
arc.putArchiveEntry(entry);
|
||||
IOUtils.copy(bfin, arc);
|
||||
|
||||
arc.closeArchiveEntry();
|
||||
} catch (IOException e) {
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
arc.finish();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
||||
Utilities.log("Compression took: " + ((end - start) / 1000000000.0) + "s", ctx);
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
package net.szum123321.textile_backup.core.compressors;
|
||||
|
||||
import net.minecraft.server.command.ServerCommandSource;
|
||||
import net.szum123321.textile_backup.TextileBackup;
|
||||
import net.szum123321.textile_backup.core.Utilities;
|
||||
import org.apache.commons.compress.archivers.zip.*;
|
||||
import org.apache.commons.compress.parallel.InputStreamSupplier;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.zip.ZipEntry;
|
||||
|
||||
/*
|
||||
This part of code is based on:
|
||||
https://stackoverflow.com/questions/54624695/how-to-implement-parallel-zip-creation-with-scatterzipoutputstream-with-zip64-su
|
||||
answer by:
|
||||
https://stackoverflow.com/users/2987755/dkb
|
||||
*/
|
||||
|
||||
public class ParallelZipCompressor {
|
||||
public static void createArchive(File in, File out, ServerCommandSource ctx) {
|
||||
Utilities.log("Starting compression...", ctx);
|
||||
|
||||
long start = System.nanoTime();
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(out);
|
||||
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
|
||||
ZipArchiveOutputStream arc = new ZipArchiveOutputStream(bufferedOutputStream)) {
|
||||
|
||||
ParallelScatterZipCreator scatterZipCreator = new ParallelScatterZipCreator();
|
||||
|
||||
arc.setMethod(ZipArchiveOutputStream.DEFLATED);
|
||||
arc.setUseZip64(Zip64Mode.AsNeeded);
|
||||
arc.setLevel(TextileBackup.config.compression);
|
||||
arc.setComment("Created on: " + Utilities.getDateTimeFormatter().format(LocalDateTime.now()));
|
||||
|
||||
File input = in.getCanonicalFile();
|
||||
|
||||
Files.walk(input.toPath()
|
||||
).filter(path -> !path.equals(input.toPath()) &&
|
||||
path.toFile().isFile() &&
|
||||
!Utilities.isBlacklisted(input.toPath().relativize(path))
|
||||
).forEach(p -> {
|
||||
ZipArchiveEntry entry = new ZipArchiveEntry(input.toPath().relativize(p).toString());
|
||||
entry.setMethod(ZipEntry.DEFLATED);
|
||||
FileInputStreamSupplier supplier = new FileInputStreamSupplier(p);
|
||||
scatterZipCreator.addArchiveEntry(entry, supplier);
|
||||
});
|
||||
|
||||
scatterZipCreator.writeTo(arc);
|
||||
|
||||
arc.finish();
|
||||
} catch (IOException | InterruptedException | ExecutionException e) {
|
||||
TextileBackup.logger.error(e.getMessage());
|
||||
}
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
||||
Utilities.log("Compression took: " + ((end - start) / 1000000000.0) + "s", ctx);
|
||||
}
|
||||
|
||||
static class FileInputStreamSupplier implements InputStreamSupplier {
|
||||
private final Path sourceFile;
|
||||
private InputStream stream;
|
||||
|
||||
FileInputStreamSupplier(Path sourceFile) {
|
||||
this.sourceFile = sourceFile;
|
||||
}
|
||||
|
||||
public InputStream get() {
|
||||
try {
|
||||
stream = Files.newInputStream(sourceFile);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
}
|
@ -52,7 +52,12 @@ public abstract class MinecraftServerMixin {
|
||||
|
||||
@Inject(method = "shutdown", at = @At(value = "INVOKE_ASSIGN", target = "Lnet/minecraft/server/MinecraftServer;save(ZZZ)Z"))
|
||||
public void onShutdown(CallbackInfo ci){
|
||||
if(TextileBackup.config.shutdownBackup)
|
||||
BackupHelper.create((MinecraftServer)(Object)this, null, false, null);
|
||||
if(TextileBackup.config.shutdownBackup) {
|
||||
try {
|
||||
BackupHelper.create((MinecraftServer) (Object) this, null, false, "shutdown").join();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
43
src/main/java/org/at4j/comp/CompressionLevel.java
Normal file
43
src/main/java/org/at4j/comp/CompressionLevel.java
Normal file
@ -0,0 +1,43 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp;
|
||||
|
||||
/**
|
||||
* This is an enumeration over different generic compression levels supported by
|
||||
* some of At4J's compression algorithm.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0.2
|
||||
*/
|
||||
public enum CompressionLevel
|
||||
{
|
||||
BEST("best"), DEFAULT("default"), FASTEST("fastest");
|
||||
|
||||
private final String m_tag;
|
||||
|
||||
private CompressionLevel(String tag)
|
||||
{
|
||||
m_tag = tag;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return m_tag + " compression";
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* This interface identifies an executor service that is used to spread the
|
||||
* encoding of bzip2 blocks over several threads. It can be used to speed up
|
||||
* bzip2 encoding.
|
||||
* <p>
|
||||
* The executor service spreads the work over all threads available to it. If a
|
||||
* {@link BZip2OutputStream} submits more work when all threads are busy, the
|
||||
* call blocks until the next thread becomes available.
|
||||
* <p>
|
||||
* When the client is done using the executor, it must call {@link #shutdown()}
|
||||
* to release all of its resources.
|
||||
* <p>
|
||||
* An executor service instance can be had from the
|
||||
* {@link BZip2OutputStream#createExecutorService(int)} method.
|
||||
* <p>
|
||||
* This interface does not expose any methods except the {@link #shutdown()}
|
||||
* method and there is no way of making a custom executor service
|
||||
* implementation.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
public interface BZip2EncoderExecutorService
|
||||
{
|
||||
/**
|
||||
* This method should be called when the executor service is no longer
|
||||
* needed. It terminates all threads and releases all other resources
|
||||
* associated with the executor.
|
||||
*/
|
||||
void shutdown();
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.RejectedExecutionHandler;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* This is the only implementation of {@link BZip2EncoderExecutorService}. All
|
||||
* objects that are using that interface assume that it is implemented by this
|
||||
* class.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BZip2EncoderExecutorServiceImpl implements BZip2EncoderExecutorService
|
||||
{
|
||||
/**
|
||||
* This rejected execution handler shoehorns in a job in an
|
||||
* {@link ExecutorService}'s job queue if it is rejected by the service.
|
||||
* This requires that the service's job queue has an upper bound and that it
|
||||
* blocks when trying to insert more elements than the bound.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
private static class ShoehornInJobRejectedExecutionHandler implements RejectedExecutionHandler
|
||||
{
|
||||
private static final ShoehornInJobRejectedExecutionHandler INSTANCE = new ShoehornInJobRejectedExecutionHandler();
|
||||
|
||||
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor)
|
||||
{
|
||||
// System.out.print("Shoehorning... ");
|
||||
try
|
||||
{
|
||||
executor.getQueue().put(r);
|
||||
}
|
||||
catch (InterruptedException e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// System.out.println("done");
|
||||
}
|
||||
}
|
||||
|
||||
private final ThreadPoolExecutor m_executor;
|
||||
private final ErrorState m_errorState;
|
||||
|
||||
BZip2EncoderExecutorServiceImpl(int noThreads, ErrorState es)
|
||||
{
|
||||
m_executor = new ThreadPoolExecutor(noThreads, noThreads, 100, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1), new EncodingThreadFactory(es), ShoehornInJobRejectedExecutionHandler.INSTANCE);
|
||||
m_errorState = es;
|
||||
}
|
||||
|
||||
ErrorState getErrorState()
|
||||
{
|
||||
return m_errorState;
|
||||
}
|
||||
|
||||
void execute(BlockEncoderRunnable r)
|
||||
{
|
||||
m_executor.execute(r);
|
||||
}
|
||||
|
||||
public void shutdown()
|
||||
{
|
||||
m_executor.shutdown();
|
||||
}
|
||||
}
|
306
src/main/java/org/at4j/comp/bzip2/BZip2OutputStream.java
Normal file
306
src/main/java/org/at4j/comp/bzip2/BZip2OutputStream.java
Normal file
@ -0,0 +1,306 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.at4j.support.io.LittleEndianBitOutputStream;
|
||||
|
||||
/**
|
||||
* This is an {@link OutputStream} for bzip2 compressing data.
|
||||
* <p>
|
||||
* For more information on the inner workings of bzip2, see <a
|
||||
* href="http://en.wikipedia.org/wiki/Bzip2">the Wikipedia article on bzip2</a>.
|
||||
* <p>
|
||||
* This stream is <i>not</i> safe for concurrent access by several writing
|
||||
* threads. A client must provide external synchronization to use this from
|
||||
* several threads.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
* @see BZip2OutputStreamSettings
|
||||
*/
|
||||
public class BZip2OutputStream extends OutputStream
|
||||
{
|
||||
private static final byte[] EOS_MAGIC = new byte[] { 0x17, 0x72, 0x45, 0x38, 0x50, (byte) 0x90 };
|
||||
|
||||
// This is used to generate unique hash codes for each created stream
|
||||
// object.
|
||||
private static final AtomicInteger HASH_CODE_GENERATOR = new AtomicInteger(0);
|
||||
|
||||
private final LittleEndianBitOutputStream m_wrapped;
|
||||
// The block size in bytes
|
||||
private final int m_blockSize;
|
||||
// This may be null
|
||||
|
||||
// Data stream that writes to the block currently being filled with data.
|
||||
private final BlockOutputStream m_blockOutputStream;
|
||||
// If several threads are used to encode the data, this is used to write the
|
||||
// encoded blocks in the right order.
|
||||
private final EncodedBlockWriter m_encodedBlockWriter;
|
||||
private final BZip2EncoderExecutorServiceImpl m_executorService;
|
||||
private final boolean m_iCreatedExecutor;
|
||||
private final int m_hashCode = HASH_CODE_GENERATOR.getAndIncrement();
|
||||
|
||||
private boolean m_closed;
|
||||
private long m_pos = 0;
|
||||
|
||||
private static void writeFileHeader(OutputStream os, int blockSize) throws IOException
|
||||
{
|
||||
// File header
|
||||
os.write('B');
|
||||
os.write('Z');
|
||||
// File version
|
||||
os.write('h');
|
||||
// Block size as a character. The ASCII code for 0 is 48.
|
||||
os.write(blockSize + 48);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new bzip2 compressing output stream with default settings.
|
||||
* @param wrapped Compressed data is written to this stream.
|
||||
* @throws IOException On errors writing the file header.
|
||||
* @see #BZip2OutputStream(OutputStream, BZip2OutputStreamSettings)
|
||||
*/
|
||||
public BZip2OutputStream(OutputStream wrapped) throws IOException
|
||||
{
|
||||
this(wrapped, new BZip2OutputStreamSettings());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new bzip2 compressing output stream.
|
||||
* @param wrapped Compressed data is written to this stream.
|
||||
* @param settings Compression settings.
|
||||
* @throws IOException On errors writing the file header.
|
||||
* @see #BZip2OutputStream(OutputStream)
|
||||
*/
|
||||
public BZip2OutputStream(OutputStream wrapped, BZip2OutputStreamSettings settings) throws IOException
|
||||
{
|
||||
// Null checks
|
||||
wrapped.getClass();
|
||||
settings.getClass();
|
||||
|
||||
m_wrapped = new LittleEndianBitOutputStream(wrapped);
|
||||
// bzip2 uses 1kb == 1000b
|
||||
m_blockSize = settings.getBlockSize() * 100 * 1000;
|
||||
|
||||
writeFileHeader(wrapped, settings.getBlockSize());
|
||||
|
||||
EncodingScratchpad sp;
|
||||
if (settings.getExecutorService() != null)
|
||||
{
|
||||
// Use the supplied executor service
|
||||
// There is only one allowed implementation for now.
|
||||
m_executorService = (BZip2EncoderExecutorServiceImpl) settings.getExecutorService();
|
||||
m_iCreatedExecutor = false;
|
||||
m_encodedBlockWriter = new EncodedBlockWriter(m_wrapped);
|
||||
// Each encoder thread has its own scratchpad
|
||||
sp = null;
|
||||
}
|
||||
else if (settings.getNumberOfEncoderThreads() > 0)
|
||||
{
|
||||
// Use separate encoder threads.
|
||||
m_executorService = new BZip2EncoderExecutorServiceImpl(settings.getNumberOfEncoderThreads(), new SingleObserverErrorState());
|
||||
m_iCreatedExecutor = true;
|
||||
m_encodedBlockWriter = new EncodedBlockWriter(m_wrapped);
|
||||
// Each encoder thread has its own scratchpad
|
||||
sp = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Encode in the thread writing to the stream.
|
||||
m_executorService = null;
|
||||
m_iCreatedExecutor = false;
|
||||
sp = new EncodingScratchpad();
|
||||
m_encodedBlockWriter = null;
|
||||
}
|
||||
|
||||
m_blockOutputStream = new BlockOutputStream(m_wrapped, m_blockSize, settings.getNumberOfHuffmanTreeRefinementIterations() , m_executorService, this, m_encodedBlockWriter, sp);
|
||||
}
|
||||
|
||||
private void assertNotClosed() throws IOException
|
||||
{
|
||||
if (m_closed)
|
||||
{
|
||||
throw new IOException("This stream is closed");
|
||||
}
|
||||
}
|
||||
|
||||
private void checkErrorState() throws IOException, RuntimeException
|
||||
{
|
||||
if (m_executorService != null)
|
||||
{
|
||||
m_executorService.getErrorState().checkAndClearErrors(this);
|
||||
}
|
||||
}
|
||||
|
||||
private void debug(String msg)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
private void writeEosBlock() throws IOException
|
||||
{
|
||||
// Write the end of stream magic
|
||||
for (int i = 0; i < EOS_MAGIC.length; i++)
|
||||
{
|
||||
m_wrapped.writeBitsLittleEndian(EOS_MAGIC[i] & 0xFF, 8);
|
||||
}
|
||||
// Write file checksum
|
||||
m_wrapped.writeBitsLittleEndian(m_blockOutputStream.getFileChecksum(), 32);
|
||||
m_wrapped.padToByteBoundary();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int b) throws IOException
|
||||
{
|
||||
assertNotClosed();
|
||||
checkErrorState();
|
||||
|
||||
m_pos++;
|
||||
m_blockOutputStream.write(b & 0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] data) throws IOException
|
||||
{
|
||||
assertNotClosed();
|
||||
checkErrorState();
|
||||
|
||||
m_pos += data.length;
|
||||
m_blockOutputStream.write(data);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] data, int offset, int len) throws IOException, IndexOutOfBoundsException
|
||||
{
|
||||
assertNotClosed();
|
||||
checkErrorState();
|
||||
|
||||
if (offset < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Offset: " + offset);
|
||||
}
|
||||
if (len < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Length: " + len);
|
||||
}
|
||||
if (offset + len > data.length)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Offset: " + offset + " + Length: " + len + " > length of data: " + data.length);
|
||||
}
|
||||
|
||||
m_pos += len;
|
||||
m_blockOutputStream.write(data, offset, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
{
|
||||
checkErrorState();
|
||||
|
||||
if (!m_closed)
|
||||
{
|
||||
// This writes out any remaining run length encoding data and closes
|
||||
// the block output stream.
|
||||
m_blockOutputStream.close();
|
||||
|
||||
if ((m_pos > 0) && (m_encodedBlockWriter != null))
|
||||
{
|
||||
// Wait for all blocks to be written.
|
||||
try
|
||||
{
|
||||
m_encodedBlockWriter.waitFor();
|
||||
}
|
||||
catch (InterruptedException e)
|
||||
{
|
||||
// Repackage
|
||||
throw new IOException("Interrupted. The output file is most likely corrupted.");
|
||||
}
|
||||
checkErrorState();
|
||||
}
|
||||
|
||||
writeEosBlock();
|
||||
|
||||
m_wrapped.close();
|
||||
|
||||
debug("Original size: " + m_pos + ", compressed size: " + m_wrapped.getNumberOfBytesWritten());
|
||||
|
||||
if (m_iCreatedExecutor && (m_executorService != null))
|
||||
{
|
||||
m_executorService.shutdown();
|
||||
}
|
||||
m_closed = true;
|
||||
super.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return m_hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
return this == o;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the stream if the client has been sloppy about it.
|
||||
*/
|
||||
@Override
|
||||
protected void finalize() throws Throwable
|
||||
{
|
||||
close();
|
||||
super.finalize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link BZip2EncoderExecutorService} that can be shared between
|
||||
* several {@link BZip2OutputStream}:s to spread the bzip2 encoding work
|
||||
* over several threads. The created executor service can be passed to the
|
||||
* {@link BZip2OutputStream} constructor in a
|
||||
* {@link BZip2OutputStreamSettings} object.
|
||||
* @param noThreads The number of threads available to the executor.
|
||||
* @return The executor service.
|
||||
*/
|
||||
public static BZip2EncoderExecutorService createExecutorService(int noThreads)
|
||||
{
|
||||
return new BZip2EncoderExecutorServiceImpl(noThreads, new MultipleObserverErrorState());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link BZip2EncoderExecutorService} that can be shared between
|
||||
* several {@link BZip2OutputStream}:s to spread the bzip2 encoding work
|
||||
* over several threads. The created executor service can be passed to the
|
||||
* {@link BZip2OutputStream} constructor in a
|
||||
* {@link BZip2OutputStreamSettings} object.
|
||||
* <p>
|
||||
* The created executor will have as many threads available to it as there
|
||||
* are CPU:s available to the JVM.
|
||||
* @return The executor service.
|
||||
*/
|
||||
public static BZip2EncoderExecutorService createExecutorService()
|
||||
{
|
||||
return createExecutorService(Runtime.getRuntime().availableProcessors());
|
||||
}
|
||||
}
|
223
src/main/java/org/at4j/comp/bzip2/BZip2OutputStreamSettings.java
Normal file
223
src/main/java/org/at4j/comp/bzip2/BZip2OutputStreamSettings.java
Normal file
@ -0,0 +1,223 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import org.at4j.support.lang.At4JException;
|
||||
|
||||
/**
|
||||
* This object contains settings for the {@link BZip2OutputStream}.
|
||||
* <p>
|
||||
* When created, this object contains the default settings. Modify the settings
|
||||
* by calling setter methods on this object.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
* @see BZip2OutputStream
|
||||
*/
|
||||
public class BZip2OutputStreamSettings implements Cloneable
|
||||
{
|
||||
/**
|
||||
* The minimum size of an encoded data block in hundreds of kilobytes. Using
|
||||
* a small block size gives faster but worse compression.
|
||||
*/
|
||||
public static final int MIN_BLOCK_SIZE = 1;
|
||||
|
||||
/**
|
||||
* The maximum size of an encoded data block in hundreds of kilobytes. Using
|
||||
* a large block size gives slower but better compression.
|
||||
*/
|
||||
public static final int MAX_BLOCK_SIZE = 9;
|
||||
|
||||
/**
|
||||
* The default block size.
|
||||
*/
|
||||
public static final int DEFAULT_BLOCK_SIZE = MAX_BLOCK_SIZE;
|
||||
|
||||
/**
|
||||
* The default number of Huffman tree refinement iterations. By having more
|
||||
* tree refinement iterations the compression gets better, but as the number
|
||||
* is increased the returns are diminishing.
|
||||
*/
|
||||
public static final int DEFAULT_NO_OF_HUFFMAN_TREE_REFINEMENT_ITERATIONS = 5;
|
||||
|
||||
/**
|
||||
* The default number of encoder threads.
|
||||
*/
|
||||
public static final int DEFAULT_NO_OF_ENCODER_THREADS = 0;
|
||||
|
||||
private int m_blockSize = DEFAULT_BLOCK_SIZE;
|
||||
private int m_numberOfHuffmanTreeRefinementIterations = DEFAULT_NO_OF_HUFFMAN_TREE_REFINEMENT_ITERATIONS;
|
||||
private int m_numberOfEncoderThreads = DEFAULT_NO_OF_ENCODER_THREADS;
|
||||
private BZip2EncoderExecutorService m_executorService;
|
||||
|
||||
/**
|
||||
* Set the size of compressed data blocks. A high block size gives good but
|
||||
* slow compression. A low block size gives worse but faster compression.
|
||||
* <p>
|
||||
* The default block size is 9 (the highest permitted value).
|
||||
* @param bs The block size in hundreds of kilobytes. This should be between
|
||||
* 1 and 9 (inclusive).
|
||||
* @return {@code this}
|
||||
* @throws IllegalArgumentException If the block size is not in the
|
||||
* permitted range.
|
||||
*/
|
||||
public BZip2OutputStreamSettings setBlockSize(int bs) throws IllegalArgumentException
|
||||
{
|
||||
if (bs < MIN_BLOCK_SIZE || bs > MAX_BLOCK_SIZE)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid block size " + bs + ". It must be between " + MIN_BLOCK_SIZE + " and " + MAX_BLOCK_SIZE + " (inclusive)");
|
||||
}
|
||||
m_blockSize = bs;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the block size for a compressed data block.
|
||||
* @return The block size for a compressed data block.
|
||||
*/
|
||||
public int getBlockSize()
|
||||
{
|
||||
return m_blockSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of tree refinement iterations that are run when creating
|
||||
* Huffman trees for each compressed data block.
|
||||
* <p>
|
||||
* A higher value for this parameter should give better but slower
|
||||
* compression. As the value increases the returns are diminishing.
|
||||
* <p>
|
||||
* The default value is five refinement iterations.
|
||||
* @param no The number of Huffman tree refinement iterations. This should
|
||||
* be a positive integer larger than zero.
|
||||
* @return {@code this}
|
||||
* @throws IllegalArgumentException If the number is not a positive integer
|
||||
* larger than zero.
|
||||
*/
|
||||
public BZip2OutputStreamSettings setNumberOfHuffmanTreeRefinementIterations(int no) throws IllegalArgumentException
|
||||
{
|
||||
if (no < 1)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid value " + no + ". It must be greater than zero");
|
||||
}
|
||||
m_numberOfHuffmanTreeRefinementIterations = no;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of Huffman tree refinement iterations.
|
||||
* @return The number of Huffman tree refinement iterations.
|
||||
*/
|
||||
public int getNumberOfHuffmanTreeRefinementIterations()
|
||||
{
|
||||
return m_numberOfHuffmanTreeRefinementIterations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a for logging diagnostic output to. Output is
|
||||
* logged to the debug and trace levels.
|
||||
* <p>
|
||||
* By default no log adapter is used and hence no diagnostic output is
|
||||
* logged.
|
||||
* @param la A log adapter.
|
||||
* @return {@code this}
|
||||
*/
|
||||
public BZip2OutputStreamSettings setLogAdapter(Object la)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the number of encoder threads used for bzip2 compressing data. bzip2
|
||||
* encoding is CPU intensive and giving the encoder more threads to work
|
||||
* with can drastically shorten the encoding time. The drawback is that the
|
||||
* memory consumption grows since each encoder thread must keep its data in
|
||||
* memory.
|
||||
* <p>
|
||||
* The default number of encoder threads is zero, which means that the
|
||||
* thread that is writing the data to the {@link BZip2OutputStream} will be
|
||||
* used for the encoding.
|
||||
* <p>
|
||||
* For the shortest encoding time, use as many threads as there are
|
||||
* available CPU:s in the system.
|
||||
* @param no The number of encoder threads to use. If this is set to {@code
|
||||
* 0}, the encoding will be done in the thread writing to the stream.
|
||||
* @return {@code this}
|
||||
* @throws IllegalArgumentException If {@code no} is negative.
|
||||
* @see #setExecutorService(BZip2EncoderExecutorService)
|
||||
*/
|
||||
public BZip2OutputStreamSettings setNumberOfEncoderThreads(int no) throws IllegalArgumentException
|
||||
{
|
||||
if (no < 0)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid number of encoder threads " + no + ". The number must be zero or greater");
|
||||
}
|
||||
|
||||
m_numberOfEncoderThreads = no;
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getNumberOfEncoderThreads()
|
||||
{
|
||||
return m_numberOfEncoderThreads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an executor service that the {@link BZip2OutputStream} will use to
|
||||
* spread the encoding over several threads. This executor can be shared
|
||||
* among several {@link BZip2OutputStream} objects.
|
||||
* <p>
|
||||
* If an executor service is set using this method, all threads that are
|
||||
* available to the executor is used for the encoding and any value set
|
||||
* using {@link #setNumberOfEncoderThreads(int)} is ignored.
|
||||
* <p>
|
||||
* An executor service is created using the
|
||||
* {@link BZip2OutputStream#createExecutorService()} or the
|
||||
* {@link BZip2OutputStream#createExecutorService(int)} method.
|
||||
* @param executorService The executor service.
|
||||
* @return {@code this}
|
||||
* @see #setNumberOfEncoderThreads(int)
|
||||
*/
|
||||
public BZip2OutputStreamSettings setExecutorService(BZip2EncoderExecutorService executorService)
|
||||
{
|
||||
m_executorService = executorService;
|
||||
return this;
|
||||
}
|
||||
|
||||
public BZip2EncoderExecutorService getExecutorService()
|
||||
{
|
||||
return m_executorService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a copy of this object.
|
||||
*/
|
||||
@Override
|
||||
public BZip2OutputStreamSettings clone()
|
||||
{
|
||||
try
|
||||
{
|
||||
return (BZip2OutputStreamSettings) super.clone();
|
||||
}
|
||||
catch (CloneNotSupportedException e)
|
||||
{
|
||||
throw new At4JException("Bug", e);
|
||||
}
|
||||
}
|
||||
}
|
29
src/main/java/org/at4j/comp/bzip2/Block.java
Normal file
29
src/main/java/org/at4j/comp/bzip2/Block.java
Normal file
@ -0,0 +1,29 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* Interface identifying a bzip2 data block. Used by the {@link BlockDecoder}.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
interface Block
|
||||
{
|
||||
// Nothing
|
||||
}
|
422
src/main/java/org/at4j/comp/bzip2/BlockDecoder.java
Normal file
422
src/main/java/org/at4j/comp/bzip2/BlockDecoder.java
Normal file
@ -0,0 +1,422 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.at4j.support.comp.ByteMoveToFront;
|
||||
import org.at4j.support.comp.IntMoveToFront;
|
||||
import org.at4j.support.io.LittleEndianBitInputStream;
|
||||
import org.at4j.support.lang.At4JException;
|
||||
import org.at4j.support.lang.UnsignedInteger;
|
||||
|
||||
/**
|
||||
* This is used by the {@link BZip2InputStream} to decode data blocks.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BlockDecoder
|
||||
{
|
||||
// The magic number identifying a block of compressed data
|
||||
private static final byte[] COMPRESSED_BLOCK_MAGIC = new byte[] { (byte) 0x31, (byte) 0x41, (byte) 0x59, (byte) 0x26, (byte) 0x53, (byte) 0x59 };
|
||||
// The magic number identifying the end of stream block
|
||||
private static final byte[] EOS_BLOCK_MAGIC = new byte[] { (byte) 0x17, (byte) 0x72, (byte) 0x45, (byte) 0x38, (byte) 0x50, (byte) 0x90 };
|
||||
|
||||
// The number of symbols to read from each Huffman tree before switching
|
||||
private static final int SYMBOLS_TO_READ_FROM_EACH_TREE = 50;
|
||||
|
||||
// The symbol value of the special RUNA symbol.
|
||||
private static final int RUNA_SYMBOL = 0;
|
||||
// The symbol value of the special RUNB symbol.
|
||||
private static final int RUNB_SYMBOL = 1;
|
||||
|
||||
private static final int MAX_NO_OF_MTF_SYMBOLS = 258;
|
||||
|
||||
private static final byte[] INITIAL_MOVE_TO_FRONT_ALPHABET = new byte[MAX_NO_OF_MTF_SYMBOLS];
|
||||
static
|
||||
{
|
||||
for (int i = 0; i < MAX_NO_OF_MTF_SYMBOLS; i++)
|
||||
{
|
||||
INITIAL_MOVE_TO_FRONT_ALPHABET[i] = (byte) i;
|
||||
}
|
||||
}
|
||||
|
||||
private final LittleEndianBitInputStream m_in;
|
||||
private final int m_blockSize;
|
||||
|
||||
// Data read from the block header
|
||||
|
||||
// Block checksum (CRC)
|
||||
private int m_readBlockChecksum;
|
||||
// The pointer to the original data used in the BW transform
|
||||
private int m_originalDataPointer;
|
||||
// The Huffman trees used for decompression
|
||||
private HighValueBranchHuffmanTree[] m_huffmanTrees;
|
||||
// The EOB (End Of Block) symbol index.
|
||||
private int m_endOfBlockSymbol;
|
||||
// The number of times that the Huffman trees are switched in the input.
|
||||
// The trees are switched every 50 bytes.
|
||||
private int m_numberOfTimesHuffmanTreesAreSwitched;
|
||||
private int[] m_treeUse;
|
||||
// Mapping between symbol values and byte values.
|
||||
private byte[] m_symbolSequenceNos;
|
||||
// Frequency of each byte in the pre-BW data
|
||||
private int[] m_byteFrequencies;
|
||||
|
||||
// State variables
|
||||
|
||||
// The number of the currently selected Huffman tree
|
||||
private HighValueBranchHuffmanTree m_curTree;
|
||||
// The number of symbols left to read from the current Huffman tree
|
||||
private int m_symbolsLeftToReadFromCurTree;
|
||||
// The current number of Huffman tree switches
|
||||
private int m_switchNo;
|
||||
// A counter for the number of bytes decoded in this block.
|
||||
private int m_noBytesDecoded;
|
||||
private ByteMoveToFront m_mtfTransformer;
|
||||
// This will hold the decoded data (before the Burrows Wheeler decoding)
|
||||
private final byte[] m_decoded;
|
||||
|
||||
BlockDecoder(LittleEndianBitInputStream in, int blockSize)
|
||||
{
|
||||
m_in = in;
|
||||
m_blockSize = blockSize;
|
||||
m_decoded = new byte[blockSize];
|
||||
}
|
||||
|
||||
private void throwIOException(String msg) throws IOException
|
||||
{
|
||||
throw new IOException(msg + ". Position in input stream: " + m_in.getNumberOfBytesRead());
|
||||
}
|
||||
|
||||
private void checkInterrupted() throws InterruptedException
|
||||
{
|
||||
if (Thread.interrupted())
|
||||
{
|
||||
throw new InterruptedException();
|
||||
}
|
||||
}
|
||||
|
||||
private void trace(String s)
|
||||
{
|
||||
System.out.println(s);
|
||||
}
|
||||
|
||||
static HighValueBranchHuffmanTree decodeHuffmanTree(final int totalNumberOfSymbols, final LittleEndianBitInputStream in) throws IOException
|
||||
{
|
||||
int[] symbolLengths = new int[totalNumberOfSymbols];
|
||||
|
||||
// Starting bit length for Huffman deltas in this tree
|
||||
int currentBitLength = in.readBits(5);
|
||||
if (currentBitLength > 20)
|
||||
{
|
||||
throw new IOException("Invalid starting bit length for Huffman deltas: " + currentBitLength + ". Must be <= 20");
|
||||
}
|
||||
|
||||
// Initialize min and max lengths per tree with values that
|
||||
// will certainly be overwritten.
|
||||
int minBitLengthPerTree = 20;
|
||||
int maxBitLengthPerTree = 0;
|
||||
|
||||
for (int j = 0; j < totalNumberOfSymbols; j++)
|
||||
{
|
||||
while (in.readBit())
|
||||
{
|
||||
currentBitLength += in.readBit() ? -1 : 1;
|
||||
if ((currentBitLength < 1) || (currentBitLength > 20))
|
||||
{
|
||||
throw new IOException("Invalid bit length " + currentBitLength);
|
||||
}
|
||||
}
|
||||
symbolLengths[j] = currentBitLength;
|
||||
|
||||
if (currentBitLength < minBitLengthPerTree)
|
||||
{
|
||||
minBitLengthPerTree = currentBitLength;
|
||||
}
|
||||
if (currentBitLength > maxBitLengthPerTree)
|
||||
{
|
||||
maxBitLengthPerTree = currentBitLength;
|
||||
}
|
||||
}
|
||||
return new HighValueBranchHuffmanTree(symbolLengths, minBitLengthPerTree, maxBitLengthPerTree, false);
|
||||
}
|
||||
|
||||
private void readCompressedBlockHeader() throws IOException
|
||||
{
|
||||
byte[] barr = new byte[4];
|
||||
|
||||
// Block checksum
|
||||
m_readBlockChecksum = (int) UnsignedInteger.fromLittleEndianByteArrayToLong(m_in.readBytes(barr, 0, 4), 0);
|
||||
|
||||
// Randomized block?
|
||||
if (m_in.readBit())
|
||||
{
|
||||
throwIOException("Randomized block mode is not supported");
|
||||
}
|
||||
|
||||
// Starting pointer into BWT
|
||||
m_in.readBytes(barr, 1, 3);
|
||||
barr[0] = 0;
|
||||
m_originalDataPointer = (int) UnsignedInteger.fromLittleEndianByteArrayToLong(barr, 0);
|
||||
if (m_originalDataPointer > m_blockSize)
|
||||
{
|
||||
throw new IOException("Invalid starting pointer " + m_originalDataPointer + ". It must be less than the block size " + m_blockSize);
|
||||
}
|
||||
|
||||
// Huffman used codes
|
||||
boolean[] usedSymbols = new boolean[256];
|
||||
int numberOfUsedSymbols = 0;
|
||||
|
||||
boolean[] inUseBlocks = new boolean[16];
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
inUseBlocks[i] = m_in.readBit();
|
||||
}
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
if (inUseBlocks[i])
|
||||
{
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
if (m_in.readBit())
|
||||
{
|
||||
usedSymbols[i * 16 + j] = true;
|
||||
numberOfUsedSymbols++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numberOfUsedSymbols == 0)
|
||||
{
|
||||
throwIOException("No symbols used in table");
|
||||
}
|
||||
|
||||
// Create a mapping for the sequence numbers of all used bytes
|
||||
m_symbolSequenceNos = new byte[numberOfUsedSymbols];
|
||||
int useIndex = 0;
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
if (usedSymbols[i])
|
||||
{
|
||||
m_symbolSequenceNos[useIndex++] = (byte) (i & 0xFF);
|
||||
}
|
||||
}
|
||||
assert useIndex == numberOfUsedSymbols;
|
||||
|
||||
m_byteFrequencies = new int[256];
|
||||
|
||||
// The number of Huffman trees to use
|
||||
int numberOfHuffmanTrees = m_in.readBits(3);
|
||||
if (numberOfHuffmanTrees < 2 || numberOfHuffmanTrees > 6)
|
||||
{
|
||||
throwIOException("Invalid number of Huffman trees " + numberOfHuffmanTrees + ". Must be between 2 and 6 (inclusive)");
|
||||
}
|
||||
|
||||
// The number of times the trees to use are swapped in the input.
|
||||
// The trees are swapped each 50 bytes.
|
||||
m_numberOfTimesHuffmanTreesAreSwitched = m_in.readBitsLittleEndian(15);
|
||||
if (m_numberOfTimesHuffmanTreesAreSwitched < 1)
|
||||
{
|
||||
throwIOException("Invalid number of times the Huffman trees are switched in the input: " + m_numberOfTimesHuffmanTreesAreSwitched);
|
||||
}
|
||||
|
||||
// Zero-terminated bit runs for each tree switch
|
||||
int[] treeUseMtf = new int[m_numberOfTimesHuffmanTreesAreSwitched];
|
||||
for (int i = 0; i < m_numberOfTimesHuffmanTreesAreSwitched; i++)
|
||||
{
|
||||
treeUseMtf[i] = 0;
|
||||
while (m_in.readBit())
|
||||
{
|
||||
treeUseMtf[i]++;
|
||||
}
|
||||
if (treeUseMtf[i] > numberOfHuffmanTrees)
|
||||
{
|
||||
throwIOException("Invalid Huffman tree use MTF " + treeUseMtf[i] + ". Must be less than the number of Huffman trees, " + numberOfHuffmanTrees);
|
||||
}
|
||||
}
|
||||
|
||||
// Decode the tree use MTF values
|
||||
m_treeUse = new int[m_numberOfTimesHuffmanTreesAreSwitched];
|
||||
// The "alphabet" for the MTF encoding -- the indices of the different
|
||||
// tree uses.
|
||||
int[] treeUseIndices = new int[numberOfHuffmanTrees];
|
||||
for (int i = 0; i < numberOfHuffmanTrees; i++)
|
||||
{
|
||||
treeUseIndices[i] = i;
|
||||
}
|
||||
new IntMoveToFront(treeUseIndices).decode(treeUseMtf, m_treeUse);
|
||||
|
||||
// Settings for the Huffman trees
|
||||
|
||||
// The total number of used symbols is the value we calculated above - 1
|
||||
// + RUNA, RUNB and an end of stream marker.
|
||||
int totalNumberOfSymbols = numberOfUsedSymbols + 2;
|
||||
m_huffmanTrees = new HighValueBranchHuffmanTree[numberOfHuffmanTrees];
|
||||
for (int i = 0; i < numberOfHuffmanTrees; i++)
|
||||
{
|
||||
m_huffmanTrees[i] = decodeHuffmanTree(totalNumberOfSymbols, m_in);
|
||||
}
|
||||
|
||||
// The symbol value for the end of the data block.
|
||||
m_endOfBlockSymbol = totalNumberOfSymbols - 1;
|
||||
}
|
||||
|
||||
private void selectNewHuffmanTree() throws IOException
|
||||
{
|
||||
if (m_switchNo >= m_numberOfTimesHuffmanTreesAreSwitched)
|
||||
{
|
||||
throwIOException("One Huffman tree switch too many: " + m_switchNo);
|
||||
}
|
||||
m_symbolsLeftToReadFromCurTree = SYMBOLS_TO_READ_FROM_EACH_TREE;
|
||||
m_curTree = m_huffmanTrees[m_treeUse[m_switchNo]];
|
||||
m_switchNo++;
|
||||
}
|
||||
|
||||
private int readSymbol() throws IOException
|
||||
{
|
||||
if (m_symbolsLeftToReadFromCurTree == 0)
|
||||
{
|
||||
selectNewHuffmanTree();
|
||||
}
|
||||
final int symbol = m_curTree.readNext(m_in);
|
||||
m_symbolsLeftToReadFromCurTree--;
|
||||
return symbol;
|
||||
}
|
||||
|
||||
private void decodeSingleByte(final int symbolMtf) throws IOException
|
||||
{
|
||||
// Move To Front decode the symbol
|
||||
final int byteIndex = m_mtfTransformer.decode(symbolMtf - 1) & 0xFF;
|
||||
|
||||
final byte value = m_symbolSequenceNos[byteIndex];
|
||||
m_decoded[m_noBytesDecoded++] = value;
|
||||
m_byteFrequencies[value & 0xFF]++;
|
||||
}
|
||||
|
||||
// returns the next symbol
|
||||
private int handleRunaAndRunb(int symbol) throws IOException
|
||||
{
|
||||
int n = 1;
|
||||
int multiplier = 0;
|
||||
while (symbol == RUNA_SYMBOL || symbol == RUNB_SYMBOL)
|
||||
{
|
||||
if (symbol == RUNA_SYMBOL)
|
||||
{
|
||||
multiplier += n;
|
||||
}
|
||||
else
|
||||
{
|
||||
multiplier += 2 * n;
|
||||
}
|
||||
// Multiply n with 2
|
||||
n <<= 1;
|
||||
symbol = readSymbol();
|
||||
}
|
||||
|
||||
// The repeated value is at the front of the MTF list
|
||||
final int byteIndex = m_mtfTransformer.decode(0) & 0xFF;
|
||||
final byte value = m_symbolSequenceNos[byteIndex];
|
||||
if (multiplier == 1)
|
||||
{
|
||||
m_decoded[m_noBytesDecoded++] = value;
|
||||
m_byteFrequencies[value & 0xFF]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
Arrays.fill(m_decoded, m_noBytesDecoded, m_noBytesDecoded + multiplier, value);
|
||||
m_noBytesDecoded += multiplier;
|
||||
m_byteFrequencies[value & 0xFF] += multiplier;
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
CompressedDataBlock readCompressedDataBlock() throws IOException, InterruptedException
|
||||
{
|
||||
readCompressedBlockHeader();
|
||||
|
||||
int symbol = readSymbol();
|
||||
|
||||
while (true)
|
||||
{
|
||||
checkInterrupted();
|
||||
|
||||
if (symbol == RUNA_SYMBOL || symbol == RUNB_SYMBOL)
|
||||
{
|
||||
symbol = handleRunaAndRunb(symbol);
|
||||
}
|
||||
else if (symbol == m_endOfBlockSymbol)
|
||||
{
|
||||
BurrowsWheelerDecoder bwd = new BurrowsWheelerDecoder(m_decoded, m_noBytesDecoded, m_byteFrequencies, m_originalDataPointer);
|
||||
return new CompressedDataBlock(new RLEDecodingInputStream(bwd.decode(), m_readBlockChecksum), m_readBlockChecksum);
|
||||
}
|
||||
else
|
||||
{
|
||||
decodeSingleByte(symbol);
|
||||
symbol = readSymbol();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void initDecoderState()
|
||||
{
|
||||
// Initialize the MTF alphabet
|
||||
final byte[] moveToFrontAlphabet = new byte[MAX_NO_OF_MTF_SYMBOLS];
|
||||
System.arraycopy(INITIAL_MOVE_TO_FRONT_ALPHABET, 0, moveToFrontAlphabet, 0, MAX_NO_OF_MTF_SYMBOLS);
|
||||
m_mtfTransformer = new ByteMoveToFront(moveToFrontAlphabet);
|
||||
m_curTree = null;
|
||||
m_symbolsLeftToReadFromCurTree = 0;
|
||||
m_switchNo = 0;
|
||||
m_noBytesDecoded = 0;
|
||||
}
|
||||
|
||||
Block getNextBlock() throws IOException
|
||||
{
|
||||
initDecoderState();
|
||||
|
||||
byte[] barr = new byte[6];
|
||||
m_in.readBytes(barr, 0, 6);
|
||||
if (Arrays.equals(COMPRESSED_BLOCK_MAGIC, barr))
|
||||
{
|
||||
trace("Found block of compressed data");
|
||||
try
|
||||
{
|
||||
return readCompressedDataBlock();
|
||||
}
|
||||
catch (InterruptedException e)
|
||||
{
|
||||
throw new At4JException(e);
|
||||
}
|
||||
}
|
||||
else if (Arrays.equals(EOS_BLOCK_MAGIC, barr))
|
||||
{
|
||||
trace("Found end of stream block");
|
||||
m_in.readBytes(barr, 0, 4);
|
||||
int readCrc32 = (int) UnsignedInteger.fromLittleEndianByteArrayToLong(barr, 0);
|
||||
return new EosBlock(readCrc32);
|
||||
}
|
||||
else
|
||||
{
|
||||
throwIOException("Invalid block header " + Arrays.toString(barr) + ". Expected compressed data block or end of stream block");
|
||||
// Never reached
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
54
src/main/java/org/at4j/comp/bzip2/BlockEncodedCallback.java
Normal file
54
src/main/java/org/at4j/comp/bzip2/BlockEncodedCallback.java
Normal file
@ -0,0 +1,54 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.at4j.support.io.BitOutput;
|
||||
|
||||
/**
|
||||
* This callback is called by the {@link BlockEncoder} when it has encoded its
|
||||
* block.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BlockEncodedCallback
|
||||
{
|
||||
private final int m_blockNo;
|
||||
private final EncodedBlockWriter m_writer;
|
||||
private final ByteArrayOutputStream m_byteOut;
|
||||
private final BitOutput m_bitOut;
|
||||
|
||||
BlockEncodedCallback(final int blockNo, final ByteArrayOutputStream byteOut, final BitOutput bitOut, final EncodedBlockWriter writer)
|
||||
{
|
||||
m_blockNo = blockNo;
|
||||
m_writer = writer;
|
||||
m_byteOut = byteOut;
|
||||
m_bitOut = bitOut;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is called by the {@link BlockEncoder} when it is done.
|
||||
*/
|
||||
void reportBlockDone() throws IOException
|
||||
{
|
||||
m_writer.writeBlock(m_blockNo, new EncodedBlockData(m_byteOut.toByteArray(), m_bitOut.getNumberOfBitsInUnfinishedByte(), m_bitOut.getUnfinishedByte()));
|
||||
}
|
||||
}
|
893
src/main/java/org/at4j/comp/bzip2/BlockEncoder.java
Normal file
893
src/main/java/org/at4j/comp/bzip2/BlockEncoder.java
Normal file
@ -0,0 +1,893 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.at4j.comp.bzip2.BurrowsWheelerEncoder.BurrowsWheelerEncodingResult;
|
||||
import org.at4j.support.comp.IntMoveToFront;
|
||||
import org.at4j.support.io.BitOutput;
|
||||
|
||||
/**
|
||||
* This is used by the thread encoding a bzip2 block.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BlockEncoder
|
||||
{
|
||||
private static final byte[] BLOCK_MAGIC = new byte[] { 0x31, 0x41, 0x59, 0x26, 0x53, 0x59 };
|
||||
|
||||
// The maximum Huffman tree depth
|
||||
private static final int MAX_HUFFMAN_BIT_LENGTH = 17;
|
||||
|
||||
// The values of the RUNA and RUNB symbols
|
||||
private static final int RUNA_SYMBOL = 0;
|
||||
private static final int RUNB_SYMBOL = 1;
|
||||
|
||||
private static final int MIN_NO_OF_HUFFMAN_TREES = 2;
|
||||
static final int MAX_NO_OF_HUFFMAN_TREES = 6;
|
||||
|
||||
// The maximum number of different MTF symbols: 256 bytes + RUNA + RUNB +
|
||||
// EOB - one byte (the first symbol does not have to be encoded thanks to
|
||||
// MTF and RLE)
|
||||
static final int MAX_NO_OF_MTF_SYMBOLS = 258;
|
||||
|
||||
// Write 50 symbols, then swap Huffman trees.
|
||||
static final int NO_OF_SYMBOLS_PER_SEGMENT = 50;
|
||||
|
||||
// Categories used when optimizing Huffman trees
|
||||
// For each tree length, in which category does a segment belong depending
|
||||
// on its encoded length percentage?
|
||||
static final int[][] CATEGORY_PER_NO_OF_TREES_AND_PERCENTAGE = new int[][] {
|
||||
// Two trees: cutoff at 30%
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
|
||||
// Three trees: cutoff at 18% and 45%
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
|
||||
// Four trees: cutoff at 15%, 30% and 55%
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
|
||||
// Five trees: cutoff at 12%, 25%, 40% and 60%
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
|
||||
// Six trees: cutoff at 8%, 25%, 36%, 51% and 63%
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 } };
|
||||
|
||||
private static final byte[] INITIAL_MTF_ALPHABET = new byte[MAX_NO_OF_MTF_SYMBOLS];
|
||||
static
|
||||
{
|
||||
for (int i = 0; i < INITIAL_MTF_ALPHABET.length; i++)
|
||||
{
|
||||
INITIAL_MTF_ALPHABET[i] = (byte) (i & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
private final byte[] m_block;
|
||||
private final int m_blockNo;
|
||||
private final int m_blockSize;
|
||||
private final int m_blockChecksum;
|
||||
// Bit flags indicating which bytes that occur at least once in this block
|
||||
private final boolean[] m_seenDifferentBytes;
|
||||
// The number of different bytes seen in this block
|
||||
private final int m_numberOfSeenDifferentBytes;
|
||||
private final int m_numberOfHuffmanTreeRefinementIterations;
|
||||
// Sink to write encoded data to.
|
||||
private final BitOutput m_out;
|
||||
// This callback is called when the block encoder is done. It may be null.
|
||||
private final BlockEncodedCallback m_blockEncoderCallback;
|
||||
|
||||
// This is set by the encoding thread before calling encode
|
||||
private EncodingScratchpad m_scratchpad;
|
||||
|
||||
BlockEncoder(final byte[] block, final int blockNo, final int blockSize, final int blockChecksum, final boolean[] seenDifferentBytes, final int numberOfSeenDifferentBytes, final int numberOfHuffmanTreeRefinementIterations,
|
||||
final BitOutput out, final BlockEncodedCallback bec)
|
||||
{
|
||||
m_block = block;
|
||||
m_blockNo = blockNo;
|
||||
m_blockSize = blockSize;
|
||||
m_blockChecksum = blockChecksum;
|
||||
m_seenDifferentBytes = seenDifferentBytes;
|
||||
m_numberOfSeenDifferentBytes = numberOfSeenDifferentBytes;
|
||||
m_numberOfHuffmanTreeRefinementIterations = numberOfHuffmanTreeRefinementIterations;
|
||||
m_out = out;
|
||||
m_blockEncoderCallback = bec;
|
||||
}
|
||||
|
||||
void setScratchpad(EncodingScratchpad sp)
|
||||
{
|
||||
m_scratchpad = sp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the seen byte values in the current block.
|
||||
*/
|
||||
private byte[] getSeenByteValues()
|
||||
{
|
||||
byte[] res = new byte[m_numberOfSeenDifferentBytes];
|
||||
int j = 0;
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
if (m_seenDifferentBytes[i])
|
||||
{
|
||||
res[j++] = (byte) (i & 0xFF);
|
||||
}
|
||||
}
|
||||
assert j == m_numberOfSeenDifferentBytes;
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add RUNA and RUNB symbols to {@code res} at {@code outIndex} to represent
|
||||
* {@code no} repetitions of the previous symbol.
|
||||
* <p>
|
||||
* This method is declared package-protected for the unit tests.
|
||||
* @return The number of symbols added. outIndex should be incremented by
|
||||
* this value by the caller.
|
||||
*/
|
||||
static int addRunaAndRunb(int[] res, int outIndex, int no)
|
||||
{
|
||||
int noWritten = 0;
|
||||
while (no > 0)
|
||||
{
|
||||
switch (no % 2)
|
||||
{
|
||||
case 1:
|
||||
res[outIndex + noWritten++] = RUNA_SYMBOL;
|
||||
no -= 1;
|
||||
break;
|
||||
case 0:
|
||||
res[outIndex + noWritten++] = RUNB_SYMBOL;
|
||||
no -= 2;
|
||||
break;
|
||||
default:
|
||||
// Should not occur unless we use relativistic arithmetic or
|
||||
// something...
|
||||
throw new RuntimeException();
|
||||
}
|
||||
no >>>= 1;
|
||||
}
|
||||
return noWritten;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a mapping between symbols and their index numbers in the array of
|
||||
* symbols.
|
||||
* @param symbols The symbols.
|
||||
* @return An array containing the index number for each symbol that occurs
|
||||
* in {@code symbols}.
|
||||
*/
|
||||
private byte[] createSequenceMap(byte[] symbols)
|
||||
{
|
||||
byte[] res = m_scratchpad.m_sequenceMap;
|
||||
byte index = 0;
|
||||
for (int i = 0; i < symbols.length; i++)
|
||||
{
|
||||
res[symbols[i] & 0xFF] = index++;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private static class MTFAndRLEResult
|
||||
{
|
||||
// The encoded data as MTF symbols.
|
||||
private final int[] m_encodedData;
|
||||
private final int m_dataLen;
|
||||
private final int m_noSeenDifferentSymbols;
|
||||
|
||||
private MTFAndRLEResult(int[] symbols, int dataLen, int noSeenDifferentSymbols)
|
||||
{
|
||||
m_encodedData = symbols;
|
||||
m_dataLen = dataLen;
|
||||
m_noSeenDifferentSymbols = noSeenDifferentSymbols;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run MTF and RLE encoding of the data in {@code data}.
|
||||
* @param data The data to encode.
|
||||
* @param dataLen The data length.
|
||||
* @param symbols An array containing all different symbols that occur in
|
||||
* {@code data}.
|
||||
* @return MTF and RLE encoded data.
|
||||
*/
|
||||
private MTFAndRLEResult moveToFrontAndRunLengthEncode(final byte[] data, final int dataLen, final byte[] symbols)
|
||||
{
|
||||
// This array will contain the run length encoded result. The result
|
||||
// will probably be shorter than data.length thanks to the run length
|
||||
// encoding, but data.length (+ 1 for the EOB symbol) is the worst case
|
||||
// length.
|
||||
boolean[] seenSymbols = new boolean[259];
|
||||
// RUNA and RUNB are always seen (even when they are not...)
|
||||
seenSymbols[0] = true;
|
||||
seenSymbols[1] = true;
|
||||
int noSeenSymbols = 2;
|
||||
|
||||
// Initialize the move to front alphabet
|
||||
final byte[] mtfAlphabet = m_scratchpad.m_mtfAlphabet;
|
||||
System.arraycopy(INITIAL_MTF_ALPHABET, 0, mtfAlphabet, 0, mtfAlphabet.length);
|
||||
|
||||
// The array to store the encoded data in.
|
||||
final int[] encodedData = m_scratchpad.m_encodedData;
|
||||
|
||||
// Create a mapping between a symbol and its index number in the array
|
||||
// of symbols
|
||||
final byte[] sequenceMap = createSequenceMap(symbols);
|
||||
|
||||
int lastSymbolIndex = 0;
|
||||
int curOutArrayIndex = 0;
|
||||
// A counter to keep track of the number of equal symbols in a row for
|
||||
// the run length encoding
|
||||
int noSame = 0;
|
||||
for (int curInArrayIndex = 0; curInArrayIndex < dataLen; curInArrayIndex++)
|
||||
{
|
||||
final byte curSymbolIndex = sequenceMap[data[curInArrayIndex] & 0xFF];
|
||||
if (curSymbolIndex == lastSymbolIndex)
|
||||
{
|
||||
noSame++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (noSame > 0)
|
||||
{
|
||||
// Run length encode
|
||||
curOutArrayIndex += addRunaAndRunb(m_scratchpad.m_encodedData, curOutArrayIndex, noSame);
|
||||
noSame = 0;
|
||||
}
|
||||
|
||||
// Search for the current symbol in the MTF alphabet and count
|
||||
// the distance
|
||||
int j = 0;
|
||||
byte lastMtf = mtfAlphabet[0];
|
||||
|
||||
while (mtfAlphabet[++j] != curSymbolIndex)
|
||||
{
|
||||
final byte nextLastMtf = mtfAlphabet[j];
|
||||
mtfAlphabet[j] = lastMtf;
|
||||
lastMtf = nextLastMtf;
|
||||
}
|
||||
// Swap the symbols in the MTF alphabet.
|
||||
mtfAlphabet[j] = lastMtf;
|
||||
mtfAlphabet[0] = curSymbolIndex;
|
||||
|
||||
// Output the distance. Distance 1 gets the value 2 since
|
||||
// RUNA and RUNB have the values 0 and 1.
|
||||
int symbolVal = j + 1;
|
||||
encodedData[curOutArrayIndex++] = symbolVal;
|
||||
if (!seenSymbols[symbolVal])
|
||||
{
|
||||
seenSymbols[symbolVal] = true;
|
||||
noSeenSymbols++;
|
||||
}
|
||||
lastSymbolIndex = curSymbolIndex;
|
||||
}
|
||||
}
|
||||
if (noSame > 0)
|
||||
{
|
||||
// One last run length encoding
|
||||
curOutArrayIndex += addRunaAndRunb(encodedData, curOutArrayIndex, noSame);
|
||||
}
|
||||
return new MTFAndRLEResult(encodedData, curOutArrayIndex, noSeenSymbols);
|
||||
}
|
||||
|
||||
private static class EncodeAllSegmentsResult
|
||||
{
|
||||
// The shortest encoded segment length for all segments.
|
||||
private int m_shortestLength;
|
||||
// The longest encoded segment length for all segments.
|
||||
private int m_longestLength;
|
||||
// A list with encoding results (the bit length) for each segment and
|
||||
// tree.
|
||||
private int[][] m_encodingResults;
|
||||
// For each segment, the index of the tree that gave the shortest
|
||||
// encoded block.
|
||||
private int[] m_treesUsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode all 50-byte segments with all trees and count the encoded lengths.
|
||||
* By doing this we can select the best Huffman tree for each segment by
|
||||
* seeing which tree that gave the shortest encoded data.
|
||||
* @param data The data to encode.
|
||||
* @param dataLen The length of the data. (This may be shorter than the
|
||||
* {@code data} array.)
|
||||
* @param codeLengths An array of code lengths for each symbol for each
|
||||
* investigated Huffman tree.
|
||||
* @param numberOfHuffmanSegments The number of 50-byte segments in the
|
||||
* current block.
|
||||
* @param numberOfDifferentSymbols The number of different symbols in the
|
||||
* data. This is the value of the EOB symbol + 1.
|
||||
* @param res The result of the operation is stored in this object.
|
||||
*/
|
||||
private void encodeAllSegmentsWithAllTrees(final int[] data, final int dataLen, final int[][] codeLengths, final int numberOfHuffmanSegments, final int numberOfDifferentSymbols, final EncodeAllSegmentsResult res) throws IOException
|
||||
{
|
||||
final int noTrees = codeLengths.length;
|
||||
final int[][] encodingResults = m_scratchpad.m_encodingResults;
|
||||
// The best tree for each segment
|
||||
final int[] treesUsed = new int[numberOfHuffmanSegments];
|
||||
// The shortest seen shortest length for all segments
|
||||
int shortestLength = Integer.MAX_VALUE;
|
||||
// The longest seen -shortest- length for all segments
|
||||
int longestLength = 0;
|
||||
for (int segmentNo = 0; segmentNo < numberOfHuffmanSegments; segmentNo++)
|
||||
{
|
||||
// Encode this segment with all Huffman trees
|
||||
int shortestLengthForSegment = Integer.MAX_VALUE;
|
||||
int bestTreeIndex = 0;
|
||||
final int[] segmentEncodingResultPerTree = new int[noTrees];
|
||||
final int segmentStart = segmentNo * NO_OF_SYMBOLS_PER_SEGMENT;
|
||||
final int segmentEnd = Math.min(segmentStart + NO_OF_SYMBOLS_PER_SEGMENT, dataLen);
|
||||
for (int treeNo = 0; treeNo < noTrees; treeNo++)
|
||||
{
|
||||
final int[] curTreeCodeLengths = codeLengths[treeNo];
|
||||
int bitLen = 0;
|
||||
for (int j = segmentStart; j < segmentEnd; j++)
|
||||
{
|
||||
bitLen += curTreeCodeLengths[data[j]];
|
||||
}
|
||||
|
||||
if (treeNo == 0)
|
||||
{
|
||||
shortestLengthForSegment = bitLen;
|
||||
}
|
||||
else if (bitLen < shortestLengthForSegment)
|
||||
{
|
||||
shortestLengthForSegment = bitLen;
|
||||
bestTreeIndex = treeNo;
|
||||
}
|
||||
segmentEncodingResultPerTree[treeNo] = bitLen;
|
||||
}
|
||||
|
||||
if (segmentNo == 0)
|
||||
{
|
||||
shortestLength = longestLength = shortestLengthForSegment;
|
||||
}
|
||||
// Don't count the length of the last segment since that is likely
|
||||
// to contain less than 50 symbols.
|
||||
else if ((segmentNo < (numberOfHuffmanSegments - 1)) && (shortestLengthForSegment < shortestLength))
|
||||
{
|
||||
shortestLength = shortestLengthForSegment;
|
||||
}
|
||||
else if (shortestLengthForSegment > longestLength)
|
||||
{
|
||||
longestLength = shortestLengthForSegment;
|
||||
}
|
||||
encodingResults[segmentNo] = segmentEncodingResultPerTree;
|
||||
treesUsed[segmentNo] = bestTreeIndex;
|
||||
}
|
||||
|
||||
res.m_encodingResults = encodingResults;
|
||||
res.m_longestLength = longestLength;
|
||||
res.m_shortestLength = shortestLength;
|
||||
res.m_treesUsed = treesUsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Divide all segments into x categories based on how well they were encoded
|
||||
* by the globally optimal Huffman tree. An optimal Huffman tree is created
|
||||
* for each category.
|
||||
* @param data The data to encode.
|
||||
* @param dataLen The length of the data.
|
||||
* @param eobSymbol The value of the special EOB symbol. This is the highest
|
||||
* used symbol value.
|
||||
* @param numberOfHuffmanTrees The number of Huffman trees to create.
|
||||
* @param numberOfSegments The number of 50-byte segments in the block.
|
||||
* @param easr The encoding results from encoding the data with the globally
|
||||
* optimal Huffman tree.
|
||||
* @param globallyOptimalTree The symbol code lengths for the globally
|
||||
* optimal Huffman tree.
|
||||
* @return The symbols code lengths for each created tree.
|
||||
*/
|
||||
private int[][] createNewTrees(final int[] data, final int dataLen, final int eobSymbol, final int numberOfHuffmanTrees, final int numberOfSegments, final EncodeAllSegmentsResult easr, final int[] globallyOptimalTree)
|
||||
{
|
||||
// Clear the frequencies array
|
||||
final int[][] frequencies = m_scratchpad.m_frequencies2d;
|
||||
for (int i = 0; i < numberOfHuffmanTrees; i++)
|
||||
{
|
||||
Arrays.fill(frequencies[i], 0);
|
||||
}
|
||||
|
||||
// How big difference in number of bits is there between the shortest
|
||||
// and the longest encoded segment?
|
||||
final int maxDistance = easr.m_longestLength - easr.m_shortestLength;
|
||||
if (maxDistance == 0)
|
||||
{
|
||||
// Nothing to do. We're as optimal as can be.
|
||||
return new int[][] { globallyOptimalTree };
|
||||
}
|
||||
|
||||
final int numberOfCategories = numberOfHuffmanTrees;
|
||||
// Which category does each 50-byte segment fall into?
|
||||
final int[] categoryPerSegment = m_scratchpad.m_categoriesPerSegment;
|
||||
// How many 50-byte segments fall into each category?
|
||||
final int[] noSegmentsPerCategory = new int[numberOfCategories];
|
||||
|
||||
// This array is used to determine which category a segment falls into
|
||||
// based on its encoded length.
|
||||
final int[] catArray = CATEGORY_PER_NO_OF_TREES_AND_PERCENTAGE[numberOfHuffmanTrees - 2];
|
||||
|
||||
// Don't include the last segment in the statistics since that is likely
|
||||
// to be shorter
|
||||
for (int i = 0; i < numberOfSegments - 1; i++)
|
||||
{
|
||||
// The shortest length for this segment.
|
||||
final int segmentLen = easr.m_encodingResults[i][easr.m_treesUsed[i]];
|
||||
final int percentage = (100 * (segmentLen - easr.m_shortestLength)) / maxDistance;
|
||||
assert percentage >= 0;
|
||||
assert percentage <= 100;
|
||||
final int catNo = catArray[percentage];
|
||||
noSegmentsPerCategory[catNo]++;
|
||||
categoryPerSegment[i] = catNo;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numberOfSegments; i++)
|
||||
{
|
||||
final int segmentStart = i * NO_OF_SYMBOLS_PER_SEGMENT;
|
||||
final int segmentEnd = Math.min(segmentStart + NO_OF_SYMBOLS_PER_SEGMENT, dataLen);
|
||||
final int[] curCatFreqs = frequencies[categoryPerSegment[i]];
|
||||
for (int j = segmentStart; j < segmentEnd; j++)
|
||||
{
|
||||
curCatFreqs[data[j]]++;
|
||||
}
|
||||
}
|
||||
|
||||
int noNewTrees = 0;
|
||||
for (int i = 0; i < numberOfCategories; i++)
|
||||
{
|
||||
if (noSegmentsPerCategory[i] > 0)
|
||||
{
|
||||
// Create a new Huffman tree for this category.
|
||||
noNewTrees++;
|
||||
}
|
||||
}
|
||||
assert noNewTrees > 0;
|
||||
|
||||
int[][] res = new int[noNewTrees][];
|
||||
int treeNo = 0;
|
||||
for (int i = 0; i < numberOfCategories; i++)
|
||||
{
|
||||
if (noSegmentsPerCategory[i] > 0)
|
||||
{
|
||||
res[treeNo++] = HighValueBranchHuffmanTree.createCodeLengths(frequencies[i], eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Refine the Huffman trees based on the encoding results. For each tree,
|
||||
* make it optimal based on the data in the segments that it was the best
|
||||
* tree for.
|
||||
* @param data The data to encode.
|
||||
* @param dataLen The length of the data to encode.
|
||||
* @param codeLengths The code length for each symbol for each tree.
|
||||
* @param easr The results when encoding the data with this set of trees.
|
||||
* @param eobSymbol The value of the EOB symbol. This is the highest symbol
|
||||
* value.
|
||||
* @return Symbol code lengths for the refined trees.
|
||||
*/
|
||||
private int[][] refineTreesBasedOnEncodingResults(final int[] data, final int dataLen, final int[][] codeLengths, final EncodeAllSegmentsResult easr, final int eobSymbol)
|
||||
{
|
||||
// Clear the frequencies array
|
||||
final int[][] frequencies = m_scratchpad.m_frequencies2d;
|
||||
for (int i = 0; i < codeLengths.length; i++)
|
||||
{
|
||||
Arrays.fill(frequencies[i], 0);
|
||||
}
|
||||
|
||||
int segmentNo = 0;
|
||||
int noInSegment = 0;
|
||||
int curTree = easr.m_treesUsed[segmentNo];
|
||||
for (int i = 0; i < dataLen; i++)
|
||||
{
|
||||
int symbolVal = data[i];
|
||||
frequencies[curTree][symbolVal]++;
|
||||
if (++noInSegment == NO_OF_SYMBOLS_PER_SEGMENT)
|
||||
{
|
||||
segmentNo++;
|
||||
// If the data length is a multiple of 50, we do a switch after
|
||||
// encoding the last symbol which will make segmentNo greater
|
||||
// than the index of the last element in easr.m_treesUsed.
|
||||
// Thus the check below.
|
||||
if (segmentNo < easr.m_treesUsed.length)
|
||||
{
|
||||
curTree = easr.m_treesUsed[segmentNo];
|
||||
}
|
||||
noInSegment = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Recreate the trees based on the gathered frequencies
|
||||
int[][] res = new int[codeLengths.length][];
|
||||
for (int i = 0; i < codeLengths.length; i++)
|
||||
{
|
||||
res[i] = HighValueBranchHuffmanTree.createCodeLengths(frequencies[i], eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of Huffman trees to use based on the number of 50-byte
|
||||
* segments in the data.
|
||||
*/
|
||||
private byte getNumberOfHuffmanTrees(int noSegments)
|
||||
{
|
||||
// Values from bzip2
|
||||
if (noSegments < 200)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
else if (noSegments < 600)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
else if (noSegments < 1200)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
else if (noSegments < 2400)
|
||||
{
|
||||
return 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 6;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the minimum and maximum code length from the array.
|
||||
* @return An int array containing the minimum and the maximum code lengths,
|
||||
* in that order.
|
||||
*/
|
||||
private int[] getMinAndMaxCodeLengths(final int[] codeLengths)
|
||||
{
|
||||
int minLength = codeLengths[0];
|
||||
int maxLength = codeLengths[0];
|
||||
for (int i = 1; i < codeLengths.length; i++)
|
||||
{
|
||||
if (codeLengths[i] < minLength)
|
||||
{
|
||||
minLength = codeLengths[i];
|
||||
}
|
||||
else if (codeLengths[i] > maxLength)
|
||||
{
|
||||
maxLength = codeLengths[i];
|
||||
}
|
||||
}
|
||||
return new int[] { minLength, maxLength };
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the Huffman trees that should be used for encoding the current
|
||||
* block. First, an globally optimal tree is created. Then new trees are
|
||||
* created from information on how well the globally optimal tree encoded
|
||||
* different segments. Lastly, the created trees are optimized based on the
|
||||
* data in the segments that they are used to encode. This last step is
|
||||
* repeated a configurable number of times ({@code
|
||||
* m_numberOfHuffmanTreeRefinementIterations}).
|
||||
* @param data The data that should be encoded using the created Huffman
|
||||
* trees.
|
||||
* @param dataLen The length of the data, excluding the trailing EOB symbol.
|
||||
* @param noSymbolsUsed The number of different symbols used in the data.
|
||||
*/
|
||||
private HuffmanTreesAndUsage createHuffmanTrees(final int[] data, final int dataLen, final int noSymbolsUsed) throws IOException
|
||||
{
|
||||
HuffmanTreesAndUsage res = new HuffmanTreesAndUsage();
|
||||
|
||||
// The maximum possible number of trees.
|
||||
// +1 == EOB symbol
|
||||
res.m_noHuffmanSegments = ((dataLen - 1 + 1) / NO_OF_SYMBOLS_PER_SEGMENT) + 1;
|
||||
|
||||
// Create a Huffman tree for the entire input.
|
||||
// Count the frequencies of the different bytes in the input.
|
||||
int[] frequencies = m_scratchpad.m_frequencies;
|
||||
Arrays.fill(frequencies, 0);
|
||||
|
||||
// The maximum symbol value used (before the EOB symbol) is at least 1
|
||||
// (RUNB).
|
||||
int maxSymbolValue = 1;
|
||||
for (int j = 0; j < dataLen; j++)
|
||||
{
|
||||
int symbolVal = data[j];
|
||||
frequencies[symbolVal]++;
|
||||
if (symbolVal > maxSymbolValue)
|
||||
{
|
||||
maxSymbolValue = symbolVal;
|
||||
}
|
||||
}
|
||||
|
||||
// Now we can infer the value of the EOB (End Of Block) symbol. Add it
|
||||
// to the end of the data. The data array is created so there should be
|
||||
// room for it.
|
||||
res.m_eobSymbol = maxSymbolValue + 1;
|
||||
frequencies[res.m_eobSymbol] = 1;
|
||||
data[dataLen] = res.m_eobSymbol;
|
||||
final int dataLenIncEob = dataLen + 1;
|
||||
|
||||
// Maybe we're already done?
|
||||
if (res.m_noHuffmanSegments < MIN_NO_OF_HUFFMAN_TREES)
|
||||
{
|
||||
// We have to encode at least two trees anyway.
|
||||
res.m_trees = new HighValueBranchHuffmanTree[MIN_NO_OF_HUFFMAN_TREES];
|
||||
int[] codeLengths = HighValueBranchHuffmanTree.createCodeLengths(frequencies, res.m_eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
|
||||
int[] minAndMaxLength = getMinAndMaxCodeLengths(codeLengths);
|
||||
HighValueBranchHuffmanTree tree = new HighValueBranchHuffmanTree(codeLengths, minAndMaxLength[0], minAndMaxLength[1], true);
|
||||
for (int i = 0; i < MIN_NO_OF_HUFFMAN_TREES; i++)
|
||||
{
|
||||
res.m_trees[i] = tree;
|
||||
}
|
||||
// Use tree #0 for all segments
|
||||
res.m_treeUsage = new int[res.m_noHuffmanSegments];
|
||||
}
|
||||
else
|
||||
{
|
||||
final int[][][] huffmanCodeLengths = new int[m_numberOfHuffmanTreeRefinementIterations + 1][][];
|
||||
final int[] codeLengthsForGloballyOptimalTree = HighValueBranchHuffmanTree.createCodeLengths(frequencies, res.m_eobSymbol + 1, MAX_HUFFMAN_BIT_LENGTH, m_scratchpad);
|
||||
final EncodeAllSegmentsResult easr = new EncodeAllSegmentsResult();
|
||||
encodeAllSegmentsWithAllTrees(data, dataLen, new int[][] { codeLengthsForGloballyOptimalTree }, res.m_noHuffmanSegments, res.m_eobSymbol + 1, easr);
|
||||
huffmanCodeLengths[0] = createNewTrees(data, dataLen, res.m_eobSymbol, getNumberOfHuffmanTrees(res.m_noHuffmanSegments), res.m_noHuffmanSegments, easr, codeLengthsForGloballyOptimalTree);
|
||||
|
||||
// Select the set of trees that gives the shortest total data length
|
||||
int bestIndex = -1;
|
||||
int bestLength = Integer.MAX_VALUE;
|
||||
int[] bestTreeUsage = null;
|
||||
for (int i = 0; i < huffmanCodeLengths.length; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
// Refine the trees
|
||||
huffmanCodeLengths[i] = refineTreesBasedOnEncodingResults(data, dataLenIncEob, huffmanCodeLengths[i - 1], easr, res.m_eobSymbol);
|
||||
}
|
||||
encodeAllSegmentsWithAllTrees(data, dataLenIncEob, huffmanCodeLengths[i], res.m_noHuffmanSegments, res.m_eobSymbol + 1, easr);
|
||||
|
||||
int totLen = 0;
|
||||
for (int j = 0; j < easr.m_treesUsed.length; j++)
|
||||
{
|
||||
totLen += easr.m_encodingResults[j][easr.m_treesUsed[j]];
|
||||
}
|
||||
|
||||
// Previously the length of each encoded tree was added to the
|
||||
// total length. That had negligible effect on the total encoded
|
||||
// length and a small impact on the performance.
|
||||
if (totLen < bestLength)
|
||||
{
|
||||
bestIndex = i;
|
||||
bestLength = totLen;
|
||||
bestTreeUsage = easr.m_treesUsed;
|
||||
}
|
||||
}
|
||||
|
||||
int noTrees = huffmanCodeLengths[bestIndex].length;
|
||||
if (noTrees < MIN_NO_OF_HUFFMAN_TREES)
|
||||
{
|
||||
res.m_trees = new HighValueBranchHuffmanTree[MIN_NO_OF_HUFFMAN_TREES];
|
||||
int[] minAndMaxLength = getMinAndMaxCodeLengths(huffmanCodeLengths[bestIndex][0]);
|
||||
for (int i = 0; i < MIN_NO_OF_HUFFMAN_TREES; i++)
|
||||
{
|
||||
res.m_trees[i] = new HighValueBranchHuffmanTree(huffmanCodeLengths[bestIndex][0], minAndMaxLength[0], minAndMaxLength[1], true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res.m_trees = new HighValueBranchHuffmanTree[huffmanCodeLengths[bestIndex].length];
|
||||
for (int i = 0; i < huffmanCodeLengths[bestIndex].length; i++)
|
||||
{
|
||||
int[] minAndMaxLengths = getMinAndMaxCodeLengths(huffmanCodeLengths[bestIndex][i]);
|
||||
res.m_trees[i] = new HighValueBranchHuffmanTree(huffmanCodeLengths[bestIndex][i], minAndMaxLengths[0], minAndMaxLengths[1], true);
|
||||
}
|
||||
}
|
||||
res.m_treeUsage = bestTreeUsage;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the Huffman tree and write it to the output.
|
||||
* @param tree The tree to encode.
|
||||
* @param numberOfDifferentSymbols The number of different symbols in the
|
||||
* tree.
|
||||
* @param out The output to write the tree to.
|
||||
*/
|
||||
static void encodeHuffmanTree(final HighValueBranchHuffmanTree tree, final int numberOfDifferentSymbols, final BitOutput out) throws IOException
|
||||
{
|
||||
// Huffman bit length for the first symbol (0..17)
|
||||
int len = tree.getBitLength(0);
|
||||
out.writeBitsLittleEndian(len, 5);
|
||||
// Encode a delta length compared to the previous length for each
|
||||
// symbol.
|
||||
for (int j = 0; j < numberOfDifferentSymbols; j++)
|
||||
{
|
||||
int prevLen = len;
|
||||
len = tree.getBitLength(j);
|
||||
while (len != prevLen)
|
||||
{
|
||||
// Alter length
|
||||
out.writeBit(true);
|
||||
if (prevLen < len)
|
||||
{
|
||||
// Make longer
|
||||
out.writeBit(false);
|
||||
prevLen++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Make shorter
|
||||
out.writeBit(true);
|
||||
prevLen--;
|
||||
}
|
||||
}
|
||||
// We are at the right length
|
||||
out.writeBit(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the block header for an encoded data block.
|
||||
* @param blockChecksum The block checksum.
|
||||
* @param bwFirstPointer The pointer to the first element in the Burrows
|
||||
* Wheeler encoded data.
|
||||
* @param seenDifferentBytes Bit flags that are switched on for all bytes
|
||||
* that are seen in the written data.
|
||||
* @param mtfrle Results from the MTF and RLE encodings.
|
||||
* @param htau The different Huffman trees and information on when they are
|
||||
* used.
|
||||
*/
|
||||
private void writeBlockHeader(final int blockChecksum, int bwFirstPointer, boolean[] seenDifferentBytes, MTFAndRLEResult mtfrle, HuffmanTreesAndUsage htau) throws IOException
|
||||
{
|
||||
// Block magic
|
||||
for (int i = 0; i < BLOCK_MAGIC.length; i++)
|
||||
{
|
||||
m_out.writeBitsLittleEndian(BLOCK_MAGIC[i] & 0xFF, 8);
|
||||
}
|
||||
// Checksum
|
||||
m_out.writeBitsLittleEndian(blockChecksum, 32);
|
||||
// Randomized? (no)
|
||||
m_out.writeBit(false);
|
||||
// Starting pointer into Burrows Wheeler matrix (24 bits)
|
||||
m_out.writeBitsLittleEndian(bwFirstPointer, 24);
|
||||
|
||||
boolean[] segmentsWithData = new boolean[16];
|
||||
boolean[][] seenData = new boolean[16][16];
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
if (seenDifferentBytes[i])
|
||||
{
|
||||
segmentsWithData[i / 16] = true;
|
||||
seenData[i / 16][i % 16] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Write a flag for each block of 16 bytes that have at least one byte
|
||||
// occurring in the encoded data.
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
m_out.writeBit(segmentsWithData[i]);
|
||||
}
|
||||
// For each block used, write a flag for each of the used bytes in that
|
||||
// block.
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
if (segmentsWithData[i])
|
||||
{
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
m_out.writeBit(seenData[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The number of Huffman trees used (2..6)
|
||||
m_out.writeBits(htau.m_trees.length, 3);
|
||||
|
||||
// The number of times the Huffman trees are switched (each 50 bytes)
|
||||
m_out.writeBitsLittleEndian(htau.m_noHuffmanSegments, 15);
|
||||
|
||||
// Which Huffman tree is selected at each switch? Use a zero-terminated
|
||||
// bit run of MTF:ed index values
|
||||
|
||||
// Init the MTF alphabet
|
||||
int[] mtfAlpha = new int[htau.m_trees.length];
|
||||
for (int i = 0; i < htau.m_trees.length; i++)
|
||||
{
|
||||
mtfAlpha[i] = i;
|
||||
}
|
||||
int[] treeUsageMtf = new int[htau.m_noHuffmanSegments];
|
||||
new IntMoveToFront(mtfAlpha).encode(htau.m_treeUsage, treeUsageMtf);
|
||||
|
||||
for (int i = 0; i < htau.m_noHuffmanSegments; i++)
|
||||
{
|
||||
// A zero-terminated bit run for the values 0..5
|
||||
int val = 0;
|
||||
while (val < treeUsageMtf[i])
|
||||
{
|
||||
m_out.writeBit(true);
|
||||
val++;
|
||||
}
|
||||
m_out.writeBit(false);
|
||||
}
|
||||
|
||||
// Encode each Huffman tree
|
||||
for (int i = 0; i < htau.m_trees.length; i++)
|
||||
{
|
||||
encodeHuffmanTree(htau.m_trees[i], htau.m_eobSymbol + 1, m_out);
|
||||
}
|
||||
}
|
||||
|
||||
private static class HuffmanTreesAndUsage
|
||||
{
|
||||
private HighValueBranchHuffmanTree[] m_trees;
|
||||
private int m_noHuffmanSegments;
|
||||
private int[] m_treeUsage;
|
||||
private int m_eobSymbol;
|
||||
}
|
||||
|
||||
void encode() throws IOException
|
||||
{
|
||||
// Fix the block overshoot. Copy DATA_OVERSHOOT bytes to the end of the
|
||||
// array. Repeat the data if the block is shorter than DATA_OVERSHOOT
|
||||
// bytes.
|
||||
int noCopied = 0;
|
||||
while (noCopied < ThreeWayRadixQuicksort.DATA_OVERSHOOT)
|
||||
{
|
||||
int noToCopy = Math.min(ThreeWayRadixQuicksort.DATA_OVERSHOOT - noCopied, m_blockSize);
|
||||
System.arraycopy(m_block, 0, m_block, m_blockSize + noCopied, noToCopy);
|
||||
noCopied += noToCopy;
|
||||
}
|
||||
|
||||
// Sort the data in the block.
|
||||
// data contains the written data after the initial move to front
|
||||
// transformation
|
||||
BurrowsWheelerEncodingResult burrWhee = new BurrowsWheelerEncoder(m_block, m_blockSize, m_scratchpad).encode();
|
||||
|
||||
// Run Move to front and run length encoding transformations on the
|
||||
// Burrows Wheeler encoded data
|
||||
MTFAndRLEResult rleMtfSymbols = moveToFrontAndRunLengthEncode(burrWhee.m_lastColumn, m_blockSize, getSeenByteValues());
|
||||
int[] encodedData = rleMtfSymbols.m_encodedData;
|
||||
|
||||
// Create the Huffman trees. This method also infers the value of the
|
||||
// EOB symbol and adds it to the end of the encodedData array.
|
||||
HuffmanTreesAndUsage htau = createHuffmanTrees(rleMtfSymbols.m_encodedData, rleMtfSymbols.m_dataLen, rleMtfSymbols.m_noSeenDifferentSymbols);
|
||||
|
||||
writeBlockHeader(m_blockChecksum, burrWhee.m_firstPointer, m_seenDifferentBytes, rleMtfSymbols, htau);
|
||||
|
||||
// Write the Huffman encoded data. The EOB symbol is last in the data.
|
||||
int swapNo = 0;
|
||||
int noLeftUntilSwap = 1;
|
||||
HighValueBranchHuffmanTree curTree = null;
|
||||
// +1 == EOB symbol
|
||||
for (int i = 0; i < rleMtfSymbols.m_dataLen + 1; i++)
|
||||
{
|
||||
if (--noLeftUntilSwap == 0)
|
||||
{
|
||||
curTree = htau.m_trees[htau.m_treeUsage[swapNo++]];
|
||||
noLeftUntilSwap = NO_OF_SYMBOLS_PER_SEGMENT;
|
||||
}
|
||||
curTree.write(m_out, encodedData[i]);
|
||||
}
|
||||
assert swapNo == htau.m_noHuffmanSegments;
|
||||
|
||||
if (m_blockEncoderCallback != null)
|
||||
{
|
||||
m_blockEncoderCallback.reportBlockDone();
|
||||
}
|
||||
}
|
||||
}
|
62
src/main/java/org/at4j/comp/bzip2/BlockEncoderRunnable.java
Normal file
62
src/main/java/org/at4j/comp/bzip2/BlockEncoderRunnable.java
Normal file
@ -0,0 +1,62 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This is used by the {@link BlockOutputStream} to encode a block in a separate
|
||||
* encoding thread. It uses a {@link BlockEncoder} to do the actual encoding.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BlockEncoderRunnable implements Runnable
|
||||
{
|
||||
private final BlockEncoder m_encoder;
|
||||
private final Object m_errorOwner;
|
||||
|
||||
BlockEncoderRunnable(final BlockEncoder be, final Object errorOwner)
|
||||
{
|
||||
m_encoder = be;
|
||||
m_errorOwner = errorOwner;
|
||||
}
|
||||
|
||||
public void run()
|
||||
{
|
||||
try
|
||||
{
|
||||
m_encoder.setScratchpad(((EncodingThread) Thread.currentThread()).getScratchpad());
|
||||
m_encoder.encode();
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
|
||||
((EncodingThread) Thread.currentThread()).getErrorState().registerError(e, m_errorOwner);
|
||||
}
|
||||
catch (RuntimeException e)
|
||||
{
|
||||
((EncodingThread) Thread.currentThread()).getErrorState().registerError(e, m_errorOwner);
|
||||
}
|
||||
catch (Error e)
|
||||
{
|
||||
|
||||
((EncodingThread) Thread.currentThread()).getErrorState().registerError(e, m_errorOwner);
|
||||
}
|
||||
}
|
||||
}
|
355
src/main/java/org/at4j/comp/bzip2/BlockOutputStream.java
Normal file
355
src/main/java/org/at4j/comp/bzip2/BlockOutputStream.java
Normal file
@ -0,0 +1,355 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.at4j.support.io.BitOutput;
|
||||
import org.at4j.support.io.LittleEndianBitOutputStream;
|
||||
|
||||
/**
|
||||
* Used by {@link BZip2OutputStream} to RLE encode data and then write it to
|
||||
* compressed blocks.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BlockOutputStream extends OutputStream
|
||||
{
|
||||
/**
|
||||
* The different states of the run length encoder.
|
||||
*/
|
||||
private static enum RLEState
|
||||
{
|
||||
ENCODING_SINGLE, COUNTING_MULTIPLE;
|
||||
}
|
||||
|
||||
// The maximum number of encoded repeated bytes.
|
||||
private static final int MAX_NO_OF_RLE_REPEATS = 251;
|
||||
|
||||
// The state of the run length encoder.
|
||||
private RLEState m_rleState;
|
||||
// The last byte value that write was called with. Used to keep track of
|
||||
// the run length encoding.
|
||||
private int m_last = -1;
|
||||
// How many equal bytes in a row has write been called with. Used to keep
|
||||
// track of the run length encoding.
|
||||
private int m_numberOfSame;
|
||||
// Encoded data is written to this.
|
||||
private final BitOutput m_wrapped;
|
||||
// The size of a Burrows Wheeler block, in bytes.
|
||||
private final int m_blockSize;
|
||||
// How many times should the Huffman trees be refined before encoding data?
|
||||
private final int m_numberOfHuffmanTreeRefinementIterations;
|
||||
// Bit flags indicating which bytes that occur at least once in the current
|
||||
// block.
|
||||
private boolean[] m_seenDifferentBytesInCurBlock;
|
||||
// The data in the current block.
|
||||
private byte[] m_block;
|
||||
// If we are using separate encoding threads, this executor is used to
|
||||
// schedule blocks for execution. Otherwise it is null.
|
||||
private final BZip2EncoderExecutorServiceImpl m_encodingExecutor;
|
||||
// A token identifying who owns the errors that may be caused by jobs that
|
||||
// we might schedule in the executor. This is null if no executor is used.
|
||||
private final Object m_errorOwner;
|
||||
|
||||
// Contains preallocated data structures. Used to reduce the number of
|
||||
// temporary objects that are created and thus avoid time spent gc:ing.
|
||||
// This is null if an executor is used for encoding.
|
||||
private final EncodingScratchpad m_scratchpad;
|
||||
|
||||
// If we use several encoder threads, this object is used for writing the
|
||||
// encoded blocks in the right order. Otherwise it is null.
|
||||
private final EncodedBlockWriter m_encodedBlockWriter;
|
||||
|
||||
// The checksum for the current block.
|
||||
private CRC m_blockChecksum;
|
||||
// The checksum for the entire file.
|
||||
private int m_fileChecksum = 0;
|
||||
|
||||
// The number of different bytes seen in the current block.
|
||||
private int m_noSeenDifferentBytesInCurBlock;
|
||||
private int m_blockPointer;
|
||||
|
||||
private int m_blockNo = 0;
|
||||
|
||||
BlockOutputStream(BitOutput wrapped, int blockSize, int numberOfHuffmanTreeRefinementIterations, BZip2EncoderExecutorServiceImpl ex, Object errorOwner, EncodedBlockWriter ebw, EncodingScratchpad sp)
|
||||
{
|
||||
// Can only have one, not both.
|
||||
assert ex == null ^ sp == null;
|
||||
|
||||
m_wrapped = wrapped;
|
||||
m_blockSize = blockSize;
|
||||
m_numberOfHuffmanTreeRefinementIterations = numberOfHuffmanTreeRefinementIterations;
|
||||
m_blockChecksum = new CRC();
|
||||
m_scratchpad = sp;
|
||||
// May be null.
|
||||
m_encodingExecutor = ex;
|
||||
// May be null
|
||||
m_errorOwner = errorOwner;
|
||||
// May be null.
|
||||
m_encodedBlockWriter = ebw;
|
||||
|
||||
startNewBlock();
|
||||
}
|
||||
|
||||
private void startNewBlock()
|
||||
{
|
||||
m_blockPointer = 0;
|
||||
|
||||
if (m_encodingExecutor != null)
|
||||
{
|
||||
// We use several threads for encoding. Create new instances for
|
||||
// data that may be used right now by an encoder.
|
||||
m_seenDifferentBytesInCurBlock = new boolean[256];
|
||||
m_block = new byte[m_blockSize + ThreeWayRadixQuicksort.DATA_OVERSHOOT];
|
||||
}
|
||||
else
|
||||
{
|
||||
// We encode in this thread. It is safe to reuse variables.
|
||||
if (m_seenDifferentBytesInCurBlock == null)
|
||||
{
|
||||
m_seenDifferentBytesInCurBlock = new boolean[256];
|
||||
}
|
||||
else
|
||||
{
|
||||
Arrays.fill(m_seenDifferentBytesInCurBlock, false);
|
||||
}
|
||||
|
||||
if (m_block == null)
|
||||
{
|
||||
m_block = new byte[m_blockSize + ThreeWayRadixQuicksort.DATA_OVERSHOOT];
|
||||
}
|
||||
}
|
||||
m_noSeenDifferentBytesInCurBlock = 0;
|
||||
|
||||
// Reset the run length encoder state
|
||||
m_last = -1;
|
||||
m_numberOfSame = 0;
|
||||
m_rleState = RLEState.ENCODING_SINGLE;
|
||||
}
|
||||
|
||||
private boolean isFull()
|
||||
{
|
||||
return m_blockPointer == m_blockSize;
|
||||
}
|
||||
|
||||
private boolean isEmpty()
|
||||
{
|
||||
return m_blockPointer == 0;
|
||||
}
|
||||
|
||||
int getFileChecksum()
|
||||
{
|
||||
return m_fileChecksum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a compressed data block.
|
||||
*/
|
||||
private void writeCurBlock() throws IOException
|
||||
{
|
||||
final int blockChecksum = m_blockChecksum.getValue();
|
||||
m_blockChecksum = new CRC();
|
||||
if (m_encodingExecutor == null)
|
||||
{
|
||||
// Encode the block in the current thread.
|
||||
BlockEncoder be = new BlockEncoder(m_block, m_blockNo, m_blockPointer, blockChecksum, m_seenDifferentBytesInCurBlock, m_noSeenDifferentBytesInCurBlock, m_numberOfHuffmanTreeRefinementIterations, m_wrapped, null);
|
||||
be.setScratchpad(m_scratchpad);
|
||||
be.encode();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Hand off the block to another thread for encoding.
|
||||
|
||||
// Allocate an output buffer that is 2/3rds of the size of the
|
||||
// written data.
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream((2 * m_blockPointer) / 3);
|
||||
BitOutput out = new LittleEndianBitOutputStream(baos);
|
||||
BlockEncodedCallback bec = new BlockEncodedCallback(m_blockNo, baos, out, m_encodedBlockWriter);
|
||||
BlockEncoder be = new BlockEncoder(m_block, m_blockNo, m_blockPointer, blockChecksum, m_seenDifferentBytesInCurBlock, m_noSeenDifferentBytesInCurBlock, m_numberOfHuffmanTreeRefinementIterations, out, bec);
|
||||
m_encodingExecutor.execute(new BlockEncoderRunnable(be, m_errorOwner));
|
||||
}
|
||||
|
||||
// Update the file checksum
|
||||
m_fileChecksum = (m_fileChecksum << 1) | (m_fileChecksum >>> 31);
|
||||
m_fileChecksum ^= blockChecksum;
|
||||
|
||||
m_blockNo++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a single byte.
|
||||
*/
|
||||
private void writeByte(final int b) throws IOException
|
||||
{
|
||||
m_block[m_blockPointer++] = (byte) (b & 0xFF);
|
||||
if (!m_seenDifferentBytesInCurBlock[b])
|
||||
{
|
||||
m_seenDifferentBytesInCurBlock[b] = true;
|
||||
m_noSeenDifferentBytesInCurBlock++;
|
||||
}
|
||||
|
||||
if (isFull())
|
||||
{
|
||||
// File f = new File("/tmp/block_" + ++m_blockNo + ".dat");
|
||||
// OutputStream os = new BufferedOutputStream(new FileOutputStream(f));
|
||||
// try
|
||||
// {
|
||||
// os.write(m_block, 0, m_blockPointer);
|
||||
// }
|
||||
// finally
|
||||
// {
|
||||
// os.close();
|
||||
// }
|
||||
|
||||
writeCurBlock();
|
||||
startNewBlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(final int b) throws IOException
|
||||
{
|
||||
// Run length encode
|
||||
switch (m_rleState)
|
||||
{
|
||||
case ENCODING_SINGLE:
|
||||
if (b == m_last)
|
||||
{
|
||||
m_numberOfSame++;
|
||||
if (m_numberOfSame == 4)
|
||||
{
|
||||
if (m_blockPointer == m_blockSize - 1)
|
||||
{
|
||||
// Corner case. bzip2 cannot handle blocks that end
|
||||
// with four equal bytes. End this block one byte
|
||||
// earlier.
|
||||
writeCurBlock();
|
||||
startNewBlock();
|
||||
write(b);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Four equal in a row. Change state
|
||||
m_rleState = RLEState.COUNTING_MULTIPLE;
|
||||
m_numberOfSame = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_last = b;
|
||||
m_numberOfSame = 1;
|
||||
}
|
||||
m_blockChecksum.update(b);
|
||||
writeByte(b);
|
||||
break;
|
||||
|
||||
case COUNTING_MULTIPLE:
|
||||
if (b == m_last)
|
||||
{
|
||||
m_numberOfSame++;
|
||||
if (m_numberOfSame == MAX_NO_OF_RLE_REPEATS)
|
||||
{
|
||||
// Cannot repeat this anymore. Update checksum, write
|
||||
// and switch state.
|
||||
for (int i = 0; i < MAX_NO_OF_RLE_REPEATS; i++)
|
||||
{
|
||||
m_blockChecksum.update(b);
|
||||
}
|
||||
writeByte(MAX_NO_OF_RLE_REPEATS);
|
||||
m_rleState = RLEState.ENCODING_SINGLE;
|
||||
m_numberOfSame = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// A byte that is not same as the last. Stop counting,
|
||||
// update the checksum and change state.
|
||||
for (int i = 0; i < m_numberOfSame; i++)
|
||||
{
|
||||
m_blockChecksum.update(m_last);
|
||||
}
|
||||
writeByte(m_numberOfSame);
|
||||
m_blockChecksum.update(b);
|
||||
writeByte(b);
|
||||
m_numberOfSame = 1;
|
||||
m_last = b;
|
||||
m_rleState = RLEState.ENCODING_SINGLE;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new RuntimeException("Unknown encoding state " + m_rleState + ". This is a bug");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(final byte[] data) throws IOException
|
||||
{
|
||||
for (int i = 0; i < data.length; i++)
|
||||
{
|
||||
write(data[i] & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(final byte[] data, final int offset, final int len) throws IOException
|
||||
{
|
||||
// Range validation is done by BZip2OutputStream
|
||||
for (int i = offset; i < offset + len; i++)
|
||||
{
|
||||
write(data[i] & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
{
|
||||
if (m_rleState == RLEState.COUNTING_MULTIPLE)
|
||||
{
|
||||
// Update the checksum and write the current count.
|
||||
for (int i = 0; i < m_numberOfSame; i++)
|
||||
{
|
||||
m_blockChecksum.update(m_last & 0xFF);
|
||||
}
|
||||
writeByte(m_numberOfSame);
|
||||
}
|
||||
|
||||
if (!isEmpty())
|
||||
{
|
||||
writeCurBlock();
|
||||
}
|
||||
|
||||
if (m_encodedBlockWriter != null)
|
||||
{
|
||||
// Tell the encoded block writer that we're done.
|
||||
m_encodedBlockWriter.writeBlock(m_blockNo, null);
|
||||
}
|
||||
|
||||
// Don't close the wrapped BitOutput. It will be used later on to write
|
||||
// the EOF block.
|
||||
|
||||
super.close();
|
||||
}
|
||||
}
|
120
src/main/java/org/at4j/comp/bzip2/BurrowsWheelerDecoder.java
Normal file
120
src/main/java/org/at4j/comp/bzip2/BurrowsWheelerDecoder.java
Normal file
@ -0,0 +1,120 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Decode Burrows Wheeler encoded data.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BurrowsWheelerDecoder
|
||||
{
|
||||
static class BWInputStream extends InputStream
|
||||
{
|
||||
private final byte[] m_decoded;
|
||||
private final int[] m_ptr;
|
||||
|
||||
private int m_curPointer;
|
||||
private boolean m_eof;
|
||||
private int m_noLeftToRead;
|
||||
|
||||
BWInputStream(byte[] decoded, int[] ptr, int originalDataPointer)
|
||||
{
|
||||
m_decoded = decoded;
|
||||
m_ptr = ptr;
|
||||
m_curPointer = ptr[originalDataPointer];
|
||||
m_noLeftToRead = ptr.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException
|
||||
{
|
||||
if (m_eof)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
final int res = m_decoded[m_curPointer] & 0xFF;
|
||||
m_eof = --m_noLeftToRead == 0;
|
||||
m_curPointer = m_ptr[m_curPointer];
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
private final byte[] m_decoded;
|
||||
private final int m_noBytesDecoded;
|
||||
private final int[] m_byteFrequencies;
|
||||
private final int m_originalDataPointer;
|
||||
|
||||
/**
|
||||
* @param encoded The encoded data. This array may be longer than the actual
|
||||
* amount of encoded data. The {@code noBytesDecoded} parameter determines
|
||||
* how much of the array that will be used.
|
||||
* @param noBytesEncoded The length of the encoded data.
|
||||
* @param byteFrequencies The number of times each byte occur in the data.
|
||||
* @param originalDataPointer The row number of the original data in the
|
||||
* Burrows Wheeler matrix.
|
||||
* @throws IOException On I/O errors.
|
||||
*/
|
||||
BurrowsWheelerDecoder(byte[] encoded, int noBytesEncoded, int[] byteFrequencies, int originalDataPointer) throws IOException
|
||||
{
|
||||
if (originalDataPointer > noBytesEncoded)
|
||||
{
|
||||
throw new IOException("Invalid pointer to original data in block header " + originalDataPointer + ". It is larger than the size of data in the block " + noBytesEncoded);
|
||||
}
|
||||
|
||||
m_decoded = encoded;
|
||||
m_noBytesDecoded = noBytesEncoded;
|
||||
m_byteFrequencies = byteFrequencies;
|
||||
m_originalDataPointer = originalDataPointer;
|
||||
}
|
||||
|
||||
InputStream decode()
|
||||
{
|
||||
// Calculate the transformation vector used to move from the encoded
|
||||
// data to the decoded.
|
||||
|
||||
// The byte frequency array contains the frequency of each byte in the
|
||||
// data. Create a new array tarr that, for each byte, specifies how many
|
||||
// bytes of lower value that occurs in the data.
|
||||
int[] tarr = new int[256];
|
||||
tarr[0] = 0;
|
||||
for (int i = 1; i < 256; i++)
|
||||
{
|
||||
tarr[i] = tarr[i - 1] + m_byteFrequencies[i - 1];
|
||||
}
|
||||
|
||||
// The ptr array will contain a chain of positions of the decoded bytes
|
||||
// in the decoded array.
|
||||
final int[] ptr = new int[m_noBytesDecoded];
|
||||
for (int i = 0; i < m_noBytesDecoded; i++)
|
||||
{
|
||||
int val = m_decoded[i] & 0xFF;
|
||||
// Get the position of the decoded byte position in tt. Increment
|
||||
// the tt position for the given value so that next occurrence of the
|
||||
// value will end up in the next position in tt.
|
||||
int ttPos = tarr[val]++;
|
||||
ptr[ttPos] = i;
|
||||
}
|
||||
|
||||
return new BWInputStream(m_decoded, ptr, m_originalDataPointer);
|
||||
}
|
||||
}
|
99
src/main/java/org/at4j/comp/bzip2/BurrowsWheelerEncoder.java
Normal file
99
src/main/java/org/at4j/comp/bzip2/BurrowsWheelerEncoder.java
Normal file
@ -0,0 +1,99 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* Burrows Wheeler encoder.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class BurrowsWheelerEncoder
|
||||
{
|
||||
static class BurrowsWheelerEncodingResult
|
||||
{
|
||||
// The values of the last column of the matrix
|
||||
final byte[] m_lastColumn;
|
||||
// The row number of the first row (the row which contains the incoming
|
||||
// data) in the sorted matrix
|
||||
final int m_firstPointer;
|
||||
|
||||
private BurrowsWheelerEncodingResult(byte[] lastColumn, int firstPointer)
|
||||
{
|
||||
m_lastColumn = lastColumn;
|
||||
m_firstPointer = firstPointer;
|
||||
}
|
||||
}
|
||||
|
||||
// The shortest length that will be quicksorted rather than shell sorted
|
||||
private static int MIN_QUICKSORT_LENGTH = 18;
|
||||
|
||||
// The data array containing the unencoded data.
|
||||
private final byte[] m_data;
|
||||
// The length of the data in the array. Data occupies the positions 0 to
|
||||
// m_length - 1 in the array.
|
||||
private final int m_length;
|
||||
// Contains preallocated data structures. Used to reduce the number of
|
||||
// temporary objects that are created and thus avoid time spent gc:ing.
|
||||
private final EncodingScratchpad m_scratchpad;
|
||||
|
||||
/**
|
||||
* @param data This array should contain a 100 byte overshoot. See
|
||||
* {@link ThreeWayRadixQuicksort#ThreeWayRadixQuicksort(byte[], int, int, EncodingScratchpad)}
|
||||
* .
|
||||
*/
|
||||
BurrowsWheelerEncoder(byte[] data, int length, EncodingScratchpad sp)
|
||||
{
|
||||
if (length > data.length)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid data length " + length + ". It must be <= the length of the data array (" + data.length + ")");
|
||||
}
|
||||
m_data = data;
|
||||
m_length = length;
|
||||
m_scratchpad = sp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a Burrows Wheeler encoding.
|
||||
*/
|
||||
BurrowsWheelerEncodingResult encode()
|
||||
{
|
||||
// Create all rotations of m_data, put them in a matrix and sort the
|
||||
// first column. For each row in the matrix, ptr contains a pointer to
|
||||
// the first byte of the row's m_data rotation.
|
||||
int[] ptr = new ThreeWayRadixQuicksort(m_data, m_length, MIN_QUICKSORT_LENGTH, m_scratchpad).sort();
|
||||
|
||||
// Get the contents of the last column in the matrix. This, and the
|
||||
// pointer to the ĺocation of where the first byte in m_data is in the
|
||||
// last column, is the result from the Burrows Wheeler encoding.
|
||||
byte[] lastColumn = m_scratchpad.m_lastColumn;
|
||||
int firstRow = -1;
|
||||
|
||||
for (int i = 0; i < m_length; i++)
|
||||
{
|
||||
int fePtr = ptr[i] - 1;
|
||||
if (fePtr < 0)
|
||||
{
|
||||
fePtr += m_length;
|
||||
firstRow = i;
|
||||
}
|
||||
lastColumn[i] = m_data[fePtr];
|
||||
}
|
||||
return new BurrowsWheelerEncodingResult(lastColumn, firstRow);
|
||||
}
|
||||
}
|
63
src/main/java/org/at4j/comp/bzip2/CRC.java
Normal file
63
src/main/java/org/at4j/comp/bzip2/CRC.java
Normal file
@ -0,0 +1,63 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* Checksum algorithm used by bzip2.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class CRC
|
||||
{
|
||||
// Table from bzip2's crctable.c
|
||||
private static final int[] CRC_TABLE = new int[] { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a,
|
||||
0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57,
|
||||
0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c,
|
||||
0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
|
||||
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1,
|
||||
0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044,
|
||||
0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee,
|
||||
0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
|
||||
0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc,
|
||||
0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
|
||||
0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd,
|
||||
0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
|
||||
0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667,
|
||||
0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 };
|
||||
|
||||
private int m_crc = 0xFFFFFFFF;
|
||||
|
||||
/**
|
||||
* @param b An integer value in the interval 0..255.
|
||||
*/
|
||||
void update(int b)
|
||||
{
|
||||
if ((b < 0) || (b > 255))
|
||||
{
|
||||
throw new IllegalArgumentException("" + b);
|
||||
}
|
||||
|
||||
m_crc = (m_crc << 8) ^ CRC_TABLE[(m_crc >>> 24) ^ b];
|
||||
}
|
||||
|
||||
int getValue()
|
||||
{
|
||||
return ~m_crc;
|
||||
}
|
||||
}
|
51
src/main/java/org/at4j/comp/bzip2/CompressedDataBlock.java
Normal file
51
src/main/java/org/at4j/comp/bzip2/CompressedDataBlock.java
Normal file
@ -0,0 +1,51 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* A bzip2 block containing compressed data.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class CompressedDataBlock implements Block
|
||||
{
|
||||
private final InputStream m_stream;
|
||||
private final int m_blockChecksum;
|
||||
|
||||
CompressedDataBlock(InputStream stream, int blockChecksum)
|
||||
{
|
||||
// Null check
|
||||
stream.getClass();
|
||||
|
||||
m_stream = stream;
|
||||
m_blockChecksum = blockChecksum;
|
||||
}
|
||||
|
||||
InputStream getStream()
|
||||
{
|
||||
return m_stream;
|
||||
}
|
||||
|
||||
int getBlockChecksum()
|
||||
{
|
||||
return m_blockChecksum;
|
||||
}
|
||||
}
|
38
src/main/java/org/at4j/comp/bzip2/EncodedBlockData.java
Normal file
38
src/main/java/org/at4j/comp/bzip2/EncodedBlockData.java
Normal file
@ -0,0 +1,38 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* This object contains data for an encoded bzip2 block.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class EncodedBlockData
|
||||
{
|
||||
final byte[] m_bytes;
|
||||
final int m_noBits;
|
||||
final int m_bitValue;
|
||||
|
||||
EncodedBlockData(byte[] bytes, int noBits, int bitValue)
|
||||
{
|
||||
m_bytes = bytes;
|
||||
m_noBits = noBits;
|
||||
m_bitValue = bitValue;
|
||||
}
|
||||
}
|
146
src/main/java/org/at4j/comp/bzip2/EncodedBlockWriter.java
Normal file
146
src/main/java/org/at4j/comp/bzip2/EncodedBlockWriter.java
Normal file
@ -0,0 +1,146 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
import org.at4j.support.io.BitOutput;
|
||||
|
||||
/**
|
||||
* This is used to write encoded blocks in the right order when several encoding
|
||||
* threads are used with the {@link BZip2OutputStream}.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class EncodedBlockWriter
|
||||
{
|
||||
// All variables are protected by this object's intrinsic lock
|
||||
private final BitOutput m_out;
|
||||
private final Map<Integer, EncodedBlockData> m_savedBlocks = new HashMap<Integer, EncodedBlockData>();
|
||||
// This latch is used to signal to the bzip2 output stream when this writer
|
||||
// is finished.
|
||||
private final CountDownLatch m_doneLatch = new CountDownLatch(1);
|
||||
private int m_nextBlockToWrite = 0;
|
||||
private boolean m_hasError;
|
||||
|
||||
EncodedBlockWriter(BitOutput out)
|
||||
{
|
||||
m_out = out;
|
||||
}
|
||||
|
||||
private void writeEncodedBlockData(final EncodedBlockData bd) throws IOException
|
||||
{
|
||||
m_out.writeBytes(bd.m_bytes, 0, bd.m_bytes.length);
|
||||
if (bd.m_noBits > 0)
|
||||
{
|
||||
m_out.writeBits(bd.m_bitValue, bd.m_noBits);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeBlockInternal(final int blockNo, final EncodedBlockData blockData) throws IOException
|
||||
{
|
||||
if (blockData == null)
|
||||
{
|
||||
// We're done
|
||||
m_doneLatch.countDown();
|
||||
}
|
||||
else
|
||||
{
|
||||
writeEncodedBlockData(blockData);
|
||||
|
||||
while (m_savedBlocks.containsKey(++m_nextBlockToWrite))
|
||||
{
|
||||
final EncodedBlockData savedBd = m_savedBlocks.get(m_nextBlockToWrite);
|
||||
if (savedBd != null)
|
||||
{
|
||||
writeEncodedBlockData(savedBd);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_doneLatch.countDown();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* It is not time to write this block just yet. Save it until it is time.
|
||||
* @param blockNo The block number.
|
||||
* @param blockData The block data.
|
||||
*/
|
||||
private void saveBlock(final int blockNo, EncodedBlockData blockData)
|
||||
{
|
||||
m_savedBlocks.put(blockNo, blockData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the block data to the output if it is the next block to write. If
|
||||
* not, queue it for later writing.
|
||||
* @param blockNo The block number.
|
||||
* @param blockData The block data or {@code null} as an end of stream
|
||||
* marker.
|
||||
* @throws IOException
|
||||
*/
|
||||
synchronized void writeBlock(final int blockNo, final EncodedBlockData blockData) throws IOException
|
||||
{
|
||||
if (m_hasError)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (blockNo == m_nextBlockToWrite)
|
||||
{
|
||||
writeBlockInternal(blockNo, blockData);
|
||||
}
|
||||
else
|
||||
{
|
||||
saveBlock(blockNo, blockData);
|
||||
}
|
||||
}
|
||||
catch (Error e)
|
||||
{
|
||||
m_hasError = true;
|
||||
m_doneLatch.countDown();
|
||||
throw e;
|
||||
}
|
||||
catch (RuntimeException e)
|
||||
{
|
||||
m_hasError = true;
|
||||
m_doneLatch.countDown();
|
||||
throw e;
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
m_hasError = true;
|
||||
m_doneLatch.countDown();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
void waitFor() throws InterruptedException
|
||||
{
|
||||
m_doneLatch.await();
|
||||
}
|
||||
}
|
107
src/main/java/org/at4j/comp/bzip2/EncodingScratchpad.java
Normal file
107
src/main/java/org/at4j/comp/bzip2/EncodingScratchpad.java
Normal file
@ -0,0 +1,107 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* This object contains different objects used by a bzip2 encoder thread. It is
|
||||
* used to reduce the number of object and array allocations.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class EncodingScratchpad
|
||||
{
|
||||
private static final int MAX_BLOCK_LENGTH = BZip2OutputStreamSettings.MAX_BLOCK_SIZE * 100 * 1000;
|
||||
private static final int MAX_NO_OF_SEGMENTS = MAX_BLOCK_LENGTH / BlockEncoder.NO_OF_SYMBOLS_PER_SEGMENT;
|
||||
|
||||
// An array that may contain the frequencies of each symbol in the data.
|
||||
final int[] m_frequencies = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS];
|
||||
|
||||
// A move to front alphabet.
|
||||
final byte[] m_mtfAlphabet = new byte[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS];
|
||||
|
||||
// This two dimensional array can contain the frequencies for the different
|
||||
// symbols encoded by the different trees (up to six trees)
|
||||
final int[][] m_frequencies2d = new int[BlockEncoder.MAX_NO_OF_HUFFMAN_TREES][BlockEncoder.MAX_NO_OF_MTF_SYMBOLS];
|
||||
|
||||
// Contains MTF and RL encoded data before the Huffman encoding. The maximum
|
||||
// size is the maximum size of a block + the EOB symbol. The actual size
|
||||
// will probably be significantly shorter than this
|
||||
final int[] m_encodedData = new int[MAX_BLOCK_LENGTH + 1];
|
||||
|
||||
// Frequencies of each two-byte combination used for the radix sort.
|
||||
// Use an overshoot of one position.
|
||||
final int[] m_twoByteFrequencies = new int[65536 + 1];
|
||||
|
||||
// Pointers created by the 3-way radix quicksort
|
||||
final int[] m_ptrs = new int[MAX_BLOCK_LENGTH];
|
||||
|
||||
// A cache for sort results
|
||||
final int[] m_sortCache = new int[MAX_BLOCK_LENGTH + ThreeWayRadixQuicksort.DATA_OVERSHOOT];
|
||||
|
||||
// Array for temporary data. This will be grown incrementally as the need
|
||||
// arises.
|
||||
int[] m_tempArea = new int[1024];
|
||||
|
||||
// Stack for block sorting
|
||||
final ThreeWayRadixQuicksort.QuickSortRangeInfo[] m_sortStack = new ThreeWayRadixQuicksort.QuickSortRangeInfo[ThreeWayRadixQuicksort.SORT_STACK_SIZE];
|
||||
|
||||
// The results when all segments of a block is encoded with all available
|
||||
// Huffman trees
|
||||
final int[][] m_encodingResults = new int[MAX_NO_OF_SEGMENTS][BlockEncoder.MAX_NO_OF_HUFFMAN_TREES];
|
||||
|
||||
final int[] m_categoriesPerSegment = new int[MAX_NO_OF_SEGMENTS];
|
||||
|
||||
// The last column after Burrows Wheeler encoding
|
||||
final byte[] m_lastColumn = new byte[MAX_BLOCK_LENGTH];
|
||||
|
||||
// The bucket sorting order
|
||||
final int[] m_sortOrder = new int[256];
|
||||
// Used when scanning pointers
|
||||
final int[] m_copyStart = new int[256];
|
||||
final int[] m_copyEnd = new int[256];
|
||||
|
||||
// Mapping between a symbol and its index number in the array of symbols
|
||||
// used by the run length encoder.
|
||||
final byte[] m_sequenceMap = new byte[256];
|
||||
|
||||
// Heap used when calculating Huffman tree code lengths
|
||||
final int[] m_htHeap = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS + 2];
|
||||
final int[] m_htWeight = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS * 2];
|
||||
final int[] m_htParent = new int[BlockEncoder.MAX_NO_OF_MTF_SYMBOLS * 2];
|
||||
|
||||
// Flags for all sorted large buckets
|
||||
final boolean[] m_sortedLargeBuckets = new boolean[256];
|
||||
// Flags for all sorted small buckets
|
||||
final boolean[] m_sortedSmallBuckets = new boolean[256 * 256];
|
||||
|
||||
/**
|
||||
* Get a temporary integer array of with a length of at least {@code len}
|
||||
* integers.
|
||||
*/
|
||||
int[] getTemp(final int len)
|
||||
{
|
||||
// Is the current temp area large enough?
|
||||
if (m_tempArea.length < len)
|
||||
{
|
||||
// No. Reallocate it
|
||||
m_tempArea = new int[len + 100];
|
||||
}
|
||||
return m_tempArea;
|
||||
}
|
||||
}
|
49
src/main/java/org/at4j/comp/bzip2/EncodingThread.java
Normal file
49
src/main/java/org/at4j/comp/bzip2/EncodingThread.java
Normal file
@ -0,0 +1,49 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* This is the kind of thread used for encoding bzip2 blocks.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class EncodingThread extends Thread
|
||||
{
|
||||
private final EncodingScratchpad m_scratchpad = new EncodingScratchpad();
|
||||
private final ErrorState m_errorState;
|
||||
|
||||
EncodingThread(Runnable r, ErrorState es)
|
||||
{
|
||||
super(r);
|
||||
m_errorState = es;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get this thread's scratchpad.
|
||||
*/
|
||||
EncodingScratchpad getScratchpad()
|
||||
{
|
||||
return m_scratchpad;
|
||||
}
|
||||
|
||||
ErrorState getErrorState()
|
||||
{
|
||||
return m_errorState;
|
||||
}
|
||||
}
|
41
src/main/java/org/at4j/comp/bzip2/EncodingThreadFactory.java
Normal file
41
src/main/java/org/at4j/comp/bzip2/EncodingThreadFactory.java
Normal file
@ -0,0 +1,41 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
||||
/**
|
||||
* This is a factory for creating {@link EncodingThread} objects.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class EncodingThreadFactory implements ThreadFactory
|
||||
{
|
||||
private final ErrorState m_errorState;
|
||||
|
||||
EncodingThreadFactory(ErrorState es)
|
||||
{
|
||||
m_errorState = es;
|
||||
}
|
||||
|
||||
public Thread newThread(Runnable r)
|
||||
{
|
||||
return new EncodingThread(r, m_errorState);
|
||||
}
|
||||
}
|
39
src/main/java/org/at4j/comp/bzip2/EosBlock.java
Normal file
39
src/main/java/org/at4j/comp/bzip2/EosBlock.java
Normal file
@ -0,0 +1,39 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
/**
|
||||
* A bzip2 block containing end of stream information.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class EosBlock implements Block
|
||||
{
|
||||
private final long m_readCrc;
|
||||
|
||||
EosBlock(long readCrc)
|
||||
{
|
||||
m_readCrc = readCrc;
|
||||
}
|
||||
|
||||
long getReadCrc()
|
||||
{
|
||||
return m_readCrc;
|
||||
}
|
||||
}
|
52
src/main/java/org/at4j/comp/bzip2/ErrorState.java
Normal file
52
src/main/java/org/at4j/comp/bzip2/ErrorState.java
Normal file
@ -0,0 +1,52 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This is used to keep track of encoding errors.
|
||||
* <p>
|
||||
* Every error is registered with an owner token that is a unique identifier for
|
||||
* the object that is affected by the error. The owner token object must have a
|
||||
* good {@link Object#hashCode()} method.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
interface ErrorState
|
||||
{
|
||||
/**
|
||||
* Register an {@link Exception} or an {@link Error}.
|
||||
* @param t The exception or error.
|
||||
* @param ownerToken A unique identifier for the error owner, i.e. the
|
||||
* object that the encoding thread is performing work for.
|
||||
*/
|
||||
void registerError(Throwable t, Object ownerToken);
|
||||
|
||||
/**
|
||||
* Check for errors.
|
||||
* @param ownerToken The owner.
|
||||
* @throws Error If there is a registered {@link Error} for this owner.
|
||||
* @throws RuntimeException If there is a registered
|
||||
* {@link RuntimeException} for this owner.
|
||||
* @throws IOException If there is a registered {@link IOException} for this
|
||||
* owner.
|
||||
*/
|
||||
void checkAndClearErrors(Object ownerToken) throws Error, RuntimeException, IOException;
|
||||
}
|
@ -0,0 +1,438 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.at4j.support.io.BitInput;
|
||||
import org.at4j.support.io.BitOutput;
|
||||
|
||||
/**
|
||||
* This object represents the type of Huffman tree that is used by bzip2. The
|
||||
* "high value branch" means that leaf nodes have the smallest possible values
|
||||
* and non-leaf nodes have the highest possible values at each tree depth.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class HighValueBranchHuffmanTree
|
||||
{
|
||||
private static final int MAX_NO_OF_SYMBOLS = 258;
|
||||
|
||||
// The shortest code length for symbols in this tree.
|
||||
private final int m_minLength;
|
||||
// The longest code length for symbols in this tree.
|
||||
private final int m_maxLength;
|
||||
// m_maxLength - m_minLength + 1;
|
||||
// Declared package private for the unit tests.
|
||||
final int m_numberOfLengths;
|
||||
|
||||
// The value limit at each data length, i.e. the maximum value for leaf
|
||||
// nodes at that data length.
|
||||
// Declared package private for the unit tests.
|
||||
final int[] m_limitsPerLength;
|
||||
// The lowest value for a symbol at each length. The value for length
|
||||
// m_minLength is at index 0 in the array.
|
||||
// Declared package private for the unit tests.
|
||||
final int[] m_baseValuesPerLength;
|
||||
// The offset in the m_symbolSequenceNos array for the first symbol for each
|
||||
// Huffman code length. The array has the length m_maxLength - m_minLength +
|
||||
// 1. The value for m_minLength is at index 0 (and is 0).
|
||||
// Declared package private for the unit tests.
|
||||
final int[] m_symbolOffsetPerLength;
|
||||
// The index of the symbol table for Huffman code no n.
|
||||
// Declared package private for the unit tests.
|
||||
final int[] m_symbolSequenceNos;
|
||||
// This table contains the Huffman codes and the code bit lengths for each
|
||||
// symbol. It is created when using the constructor that calculates the
|
||||
// Huffman trees to speed up encoding.
|
||||
final int[][] m_huffmanCodesAndLengthsPerSymbol;
|
||||
|
||||
/**
|
||||
* Get the Huffman code and its bit length for a symbol.
|
||||
* @param symbol The symbol.
|
||||
* @param huffmanIndex The symbol's index in the list of sorted symbols.
|
||||
* @param codeAndLength An int array of length 2 used to store the result
|
||||
* in.
|
||||
*/
|
||||
private int[] getCodeAndLengthForSymbol(final int symbol, final int huffmanIndex, final int[] codeAndLength)
|
||||
{
|
||||
// Calculate the length of the synbol's Huffman code
|
||||
int deltaLen;
|
||||
for (deltaLen = 0; deltaLen < m_numberOfLengths - 1; deltaLen++)
|
||||
{
|
||||
if (huffmanIndex < m_symbolOffsetPerLength[deltaLen + 1])
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
codeAndLength[0] = m_baseValuesPerLength[deltaLen] + (huffmanIndex - m_symbolOffsetPerLength[deltaLen]);
|
||||
codeAndLength[1] = m_minLength + deltaLen;
|
||||
return codeAndLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a canonical Huffman tree for the supplied symbols.
|
||||
* <p>
|
||||
* Symbol lengths for a canonical Huffman tree can be created by the
|
||||
* {@link #createCodeLengths(int[], int, int)} method.
|
||||
* @param symbolLengths The length of the Huffman code for each symbol.
|
||||
* @param minLength The shortest Huffman code length in the tree.
|
||||
* @param maxLength The longest Huffman code length in the tree.
|
||||
* @param forEncoding Should the tree be used for encoding? If so, a loookup
|
||||
* table that contains the Huffman code for each symbol is created to speed
|
||||
* up the encoding.
|
||||
* @throws IllegalArgumentException If the lengths are invalid.
|
||||
*/
|
||||
HighValueBranchHuffmanTree(final int[] symbolLengths, final int minLength, final int maxLength, final boolean forEncoding) throws IllegalArgumentException
|
||||
{
|
||||
if ((minLength < 0) || (maxLength < minLength))
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal min or max length, min: " + minLength + ", max: " + maxLength);
|
||||
}
|
||||
|
||||
final int numberOfSymbols = symbolLengths.length;
|
||||
final int numberOfLengths = maxLength - minLength + 1;
|
||||
// Create a array of symbol sequence numbers sorted on their symbol
|
||||
// lengths
|
||||
m_symbolSequenceNos = new int[numberOfSymbols];
|
||||
// The number of symbols having each code length
|
||||
final int[] numl = new int[numberOfLengths];
|
||||
int index = 0;
|
||||
for (int i = minLength; i <= maxLength; i++)
|
||||
{
|
||||
numl[i - minLength] = 0;
|
||||
for (int j = 0; j < numberOfSymbols; j++)
|
||||
{
|
||||
if (symbolLengths[j] == i)
|
||||
{
|
||||
m_symbolSequenceNos[index++] = j;
|
||||
numl[i - minLength]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_symbolOffsetPerLength = new int[numberOfLengths];
|
||||
m_symbolOffsetPerLength[0] = 0;
|
||||
for (int i = 0; i < numberOfLengths - 1; i++)
|
||||
{
|
||||
m_symbolOffsetPerLength[i + 1] = m_symbolOffsetPerLength[i] + numl[i];
|
||||
}
|
||||
|
||||
// The value limit at each length
|
||||
m_limitsPerLength = new int[numberOfLengths - 1];
|
||||
m_baseValuesPerLength = new int[numberOfLengths];
|
||||
int prevLimit = 0;
|
||||
for (int i = minLength; i <= maxLength; i++)
|
||||
{
|
||||
index = i - minLength;
|
||||
// The base value for this length is the value of the smallest
|
||||
// allowed symbol for this length. The smallest allowed symbol is
|
||||
// the limit for the previous length with a zero at the end.
|
||||
m_baseValuesPerLength[index] = prevLimit << 1;
|
||||
|
||||
if (i < maxLength)
|
||||
{
|
||||
// The limit for this length is the base value for this length
|
||||
// plus the number of symbols for this length.
|
||||
prevLimit = m_baseValuesPerLength[index] + numl[index];
|
||||
m_limitsPerLength[index] = prevLimit - 1;
|
||||
}
|
||||
}
|
||||
|
||||
m_minLength = minLength;
|
||||
m_maxLength = maxLength;
|
||||
m_numberOfLengths = (byte) (maxLength - minLength + 1);
|
||||
if (forEncoding)
|
||||
{
|
||||
// Create an inverse mapping into the list of sorted symbols
|
||||
final int[] huffmanIndexPerSymbol = new int[symbolLengths.length];
|
||||
Arrays.fill(huffmanIndexPerSymbol, -1);
|
||||
for (int i = 0; i < m_symbolSequenceNos.length; i++)
|
||||
{
|
||||
huffmanIndexPerSymbol[m_symbolSequenceNos[i]] = i;
|
||||
}
|
||||
|
||||
// Create a table containing the Huffman code and its bit length for
|
||||
// each symbol. This is used to speed up writes.
|
||||
m_huffmanCodesAndLengthsPerSymbol = new int[symbolLengths.length][2];
|
||||
int[] codeAndLength = new int[2];
|
||||
for (int i = 0; i < symbolLengths.length; i++)
|
||||
{
|
||||
codeAndLength = getCodeAndLengthForSymbol(i, huffmanIndexPerSymbol[i], codeAndLength);
|
||||
m_huffmanCodesAndLengthsPerSymbol[i][0] = codeAndLength[0];
|
||||
m_huffmanCodesAndLengthsPerSymbol[i][1] = codeAndLength[1];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Don't create these variables. They are only used when writing data
|
||||
// and it is assumed that this constructor will only be used to create
|
||||
// trees for reading data.
|
||||
m_huffmanCodesAndLengthsPerSymbol = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static void upHeap(final int[] heap, final int[] weight, int nHeap)
|
||||
{
|
||||
int tmp = heap[nHeap];
|
||||
while (weight[tmp] < weight[heap[nHeap >> 1]])
|
||||
{
|
||||
heap[nHeap] = heap[nHeap >>> 1];
|
||||
nHeap >>>= 1;
|
||||
}
|
||||
heap[nHeap] = tmp;
|
||||
}
|
||||
|
||||
private static void downHeap(final int[] heap, final int[] weight, final int nHeap, int n)
|
||||
{
|
||||
int tmp = heap[n];
|
||||
while (true)
|
||||
{
|
||||
int yy = n << 1;
|
||||
if (yy > nHeap)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]])
|
||||
{
|
||||
yy++;
|
||||
}
|
||||
if (weight[tmp] < weight[heap[yy]])
|
||||
{
|
||||
break;
|
||||
}
|
||||
heap[n] = heap[yy];
|
||||
n = yy;
|
||||
}
|
||||
heap[n] = tmp;
|
||||
}
|
||||
|
||||
private static int addWeights(final int w1, final int w2)
|
||||
{
|
||||
final int d1 = w1 & 0xFF;
|
||||
final int d2 = w2 & 0xFF;
|
||||
final int ww1 = w1 & 0xFFFFFF00;
|
||||
final int ww2 = w2 & 0xFFFFFF00;
|
||||
return (ww1 + ww2) | (1 + (d1 > d2 ? d1 : d2));
|
||||
}
|
||||
|
||||
int getMinLength()
|
||||
{
|
||||
return m_minLength;
|
||||
}
|
||||
|
||||
int getMaxLength()
|
||||
{
|
||||
return m_maxLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sorted array with symbol sequence numbers and their Huffman code
|
||||
* lengths. The returned array is sorted with the most frequent occurring
|
||||
* symbol first (i.e. the symbol with the shortest Huffman code).
|
||||
* <p>
|
||||
* This method is used for testing.
|
||||
* @return Array a[n][0] = symbol, a[n][1] = Huffman code length
|
||||
*/
|
||||
int[][] getSortedSymbolSequenceNosAndCodeLengths()
|
||||
{
|
||||
int[][] res = new int[m_symbolSequenceNos.length][2];
|
||||
int length = m_minLength;
|
||||
for (int i = 0; i < m_symbolSequenceNos.length; i++)
|
||||
{
|
||||
while ((length < m_maxLength) && (i >= m_symbolOffsetPerLength[length - m_minLength + 1]))
|
||||
{
|
||||
length++;
|
||||
}
|
||||
res[i][0] = m_symbolSequenceNos[i];
|
||||
res[i][1] = length;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next symbol.
|
||||
* @param in The input to read the symbol from.
|
||||
* @return The next symbol.
|
||||
* @throws IOException On I/O errors.
|
||||
*/
|
||||
int readNext(final BitInput in) throws IOException
|
||||
{
|
||||
int code = in.readBits(m_minLength);
|
||||
// m_limitsPerLength.length == 0 means that all Huffman codes have the
|
||||
// same length.
|
||||
if (m_limitsPerLength.length == 0 || code <= m_limitsPerLength[0])
|
||||
{
|
||||
return m_symbolSequenceNos[code];
|
||||
}
|
||||
else
|
||||
{
|
||||
int codeLength = m_minLength;
|
||||
int index = 1;
|
||||
while (true)
|
||||
{
|
||||
code = (code << 1) | (in.readBit() ? 1 : 0);
|
||||
codeLength++;
|
||||
if ((codeLength == m_maxLength) || (code <= m_limitsPerLength[index]))
|
||||
{
|
||||
return m_symbolSequenceNos[m_symbolOffsetPerLength[index] + (code - m_baseValuesPerLength[index])];
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a symbol.
|
||||
* @param out The output to write to.
|
||||
* @param symbol The symbol to write.
|
||||
* @throws IOException On I/O errors.
|
||||
*/
|
||||
void write(final BitOutput out, final int symbol) throws IOException
|
||||
{
|
||||
out.writeBitsLittleEndian(m_huffmanCodesAndLengthsPerSymbol[symbol][0], m_huffmanCodesAndLengthsPerSymbol[symbol][1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of bits used for encoding the symbol.
|
||||
*/
|
||||
int getBitLength(int symbol)
|
||||
{
|
||||
return m_huffmanCodesAndLengthsPerSymbol[symbol][1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the Huffman code lengths for the optimal, depth-limited Huffman
|
||||
* tree for the supplied symbol frequencies.
|
||||
* <p>
|
||||
* This method uses the (slightly magic) algorithm from bzip2 1.0.5.
|
||||
* @param frequencies The frequencies for each symbol in the data to be
|
||||
* encoded.
|
||||
* @param noSymbols The number of different symbols in the data to encode.
|
||||
* This should be the maximum symbol value (the EOB symbol's value) + 1.
|
||||
* @param maxLength The maximum code length which also will be the depth of
|
||||
* the Huffman tree. If this is too small, this method will get stuck in an
|
||||
* infinite loop.
|
||||
* @return The Huffman code lengths for each symbol.
|
||||
*/
|
||||
static int[] createCodeLengths(final int[] frequencies, final int noSymbols, final int maxLength, final EncodingScratchpad scratchpad)
|
||||
{
|
||||
/*
|
||||
* Nodes and heap entries run from 1. Entry 0 for both the heap and
|
||||
* nodes is a sentinel.
|
||||
*/
|
||||
|
||||
final int[] heap = scratchpad.m_htHeap;
|
||||
final int[] weight = scratchpad.m_htWeight;
|
||||
final int[] parent = scratchpad.m_htParent;
|
||||
|
||||
final int[] res = new int[noSymbols];
|
||||
|
||||
int actualMaxLength = -1;
|
||||
int actualMinLength = Integer.MAX_VALUE;
|
||||
|
||||
for (int i = 0; i < noSymbols; i++)
|
||||
{
|
||||
weight[i + 1] = (frequencies[i] == 0 ? 1 : frequencies[i]) << 8;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
int noNodes = noSymbols;
|
||||
int nHeap = 0;
|
||||
|
||||
heap[0] = 0;
|
||||
weight[0] = 0;
|
||||
parent[0] = -2;
|
||||
|
||||
for (int i = 1; i <= noSymbols; i++)
|
||||
{
|
||||
parent[i] = -1;
|
||||
nHeap++;
|
||||
heap[nHeap] = i;
|
||||
upHeap(heap, weight, nHeap);
|
||||
}
|
||||
|
||||
assert nHeap < MAX_NO_OF_SYMBOLS + 2;
|
||||
|
||||
while (nHeap > 1)
|
||||
{
|
||||
int n1 = heap[1];
|
||||
heap[1] = heap[nHeap];
|
||||
nHeap--;
|
||||
downHeap(heap, weight, nHeap, 1);
|
||||
int n2 = heap[1];
|
||||
heap[1] = heap[nHeap];
|
||||
nHeap--;
|
||||
downHeap(heap, weight, nHeap, 1);
|
||||
noNodes++;
|
||||
parent[n1] = parent[n2] = noNodes;
|
||||
weight[noNodes] = addWeights(weight[n1], weight[n2]);
|
||||
parent[noNodes] = -1;
|
||||
nHeap++;
|
||||
heap[nHeap] = noNodes;
|
||||
upHeap(heap, weight, nHeap);
|
||||
}
|
||||
|
||||
assert noNodes < MAX_NO_OF_SYMBOLS * 2;
|
||||
|
||||
boolean tooLong = false;
|
||||
INNER: for (int i = 1; i <= noSymbols; i++)
|
||||
{
|
||||
int j = 0;
|
||||
int k = i;
|
||||
while (parent[k] >= 0)
|
||||
{
|
||||
k = parent[k];
|
||||
j++;
|
||||
}
|
||||
res[i - 1] = j;
|
||||
if (j > maxLength)
|
||||
{
|
||||
tooLong = true;
|
||||
break INNER;
|
||||
}
|
||||
|
||||
if (j > actualMaxLength)
|
||||
{
|
||||
actualMaxLength = j;
|
||||
}
|
||||
if (j < actualMinLength)
|
||||
{
|
||||
actualMinLength = j;
|
||||
}
|
||||
}
|
||||
|
||||
if (!tooLong)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= noSymbols; i++)
|
||||
{
|
||||
int j = weight[i] >> 8;
|
||||
j = 1 + (j / 2);
|
||||
weight[i] = j << 8;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* This {@link ErrorState} may have several observers which forces us to have to
|
||||
* care about the owner of each registered error.
|
||||
* <p>
|
||||
* This is used when sharing the same
|
||||
* {@link java.util.concurrent.ExecutorService} between several
|
||||
* {@link BZip2OutputStream}:s.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class MultipleObserverErrorState implements ErrorState
|
||||
{
|
||||
private Map<Object, Throwable> m_errors = new ConcurrentHashMap<Object, Throwable>(4);
|
||||
|
||||
public void checkAndClearErrors(Object ownerToken) throws Error, RuntimeException, IOException
|
||||
{
|
||||
Throwable t = m_errors.remove(ownerToken);
|
||||
if (t != null)
|
||||
{
|
||||
if (t instanceof IOException)
|
||||
{
|
||||
throw (IOException) t;
|
||||
}
|
||||
else if (t instanceof RuntimeException)
|
||||
{
|
||||
throw (RuntimeException) t;
|
||||
}
|
||||
else if (t instanceof Error)
|
||||
{
|
||||
throw (Error) t;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new RuntimeException(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void registerError(Throwable t, Object ownerToken)
|
||||
{
|
||||
m_errors.put(ownerToken, t);
|
||||
}
|
||||
}
|
164
src/main/java/org/at4j/comp/bzip2/RLEDecodingInputStream.java
Normal file
164
src/main/java/org/at4j/comp/bzip2/RLEDecodingInputStream.java
Normal file
@ -0,0 +1,164 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* This stream run length decodes read data. It is used by the
|
||||
* {@link BZip2InputStream}.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class RLEDecodingInputStream extends InputStream
|
||||
{
|
||||
private static enum RLEState
|
||||
{
|
||||
READING, REPEATING, ABOUT_TO_READ_HOW_MANY_TO_REPEAT, EOF;
|
||||
}
|
||||
|
||||
// Block checksum calculated while reading the block contents.
|
||||
private final CRC m_blockChecksum = new CRC();
|
||||
private final InputStream m_wrapped;
|
||||
private final long m_readChecksum;
|
||||
|
||||
private RLEState m_state;
|
||||
|
||||
private int m_noLeftToRepeat;
|
||||
private int m_last;
|
||||
private int m_numberOfSimilar;
|
||||
|
||||
RLEDecodingInputStream(InputStream wrapped, long readChecksum)
|
||||
{
|
||||
m_wrapped = wrapped;
|
||||
m_readChecksum = readChecksum;
|
||||
m_state = RLEState.READING;
|
||||
m_numberOfSimilar = 0;
|
||||
m_last = -1;
|
||||
}
|
||||
|
||||
private void handleEof() throws IOException
|
||||
{
|
||||
if (m_blockChecksum.getValue() != m_readChecksum)
|
||||
{
|
||||
throw new IOException("Invalid block checksum. Was " + m_blockChecksum.getValue() + ", expected " + m_readChecksum);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException
|
||||
{
|
||||
switch (m_state)
|
||||
{
|
||||
case EOF:
|
||||
return -1;
|
||||
|
||||
case READING:
|
||||
int val = m_wrapped.read();
|
||||
if (val == -1)
|
||||
{
|
||||
m_state = RLEState.EOF;
|
||||
handleEof();
|
||||
return -1;
|
||||
}
|
||||
if (val == m_last)
|
||||
{
|
||||
m_numberOfSimilar++;
|
||||
if (m_numberOfSimilar == 4)
|
||||
{
|
||||
// Four in a row. The next value is a repeat number.
|
||||
m_state = RLEState.ABOUT_TO_READ_HOW_MANY_TO_REPEAT;
|
||||
m_numberOfSimilar = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_numberOfSimilar = 1;
|
||||
m_last = val;
|
||||
}
|
||||
m_blockChecksum.update(val);
|
||||
return val;
|
||||
|
||||
case ABOUT_TO_READ_HOW_MANY_TO_REPEAT:
|
||||
m_noLeftToRepeat = m_wrapped.read();
|
||||
if (m_noLeftToRepeat == -1)
|
||||
{
|
||||
// A rather unexpected EOF
|
||||
m_state = RLEState.EOF;
|
||||
handleEof();
|
||||
return -1;
|
||||
}
|
||||
else if (m_noLeftToRepeat == 0)
|
||||
{
|
||||
// Nothing to repeat. Go on to read the next value.
|
||||
m_state = RLEState.READING;
|
||||
return read();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_state = RLEState.REPEATING;
|
||||
m_noLeftToRepeat--;
|
||||
if (m_noLeftToRepeat == 0)
|
||||
{
|
||||
// Just one to repeat, which we will do in this call.
|
||||
m_state = RLEState.READING;
|
||||
}
|
||||
m_blockChecksum.update(m_last);
|
||||
return m_last;
|
||||
}
|
||||
|
||||
case REPEATING:
|
||||
m_noLeftToRepeat--;
|
||||
if (m_noLeftToRepeat == 0)
|
||||
{
|
||||
m_state = RLEState.READING;
|
||||
}
|
||||
m_blockChecksum.update(m_last);
|
||||
return m_last;
|
||||
|
||||
default:
|
||||
throw new RuntimeException("Unknown state " + m_state + ". This is a bug");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] barr, int off, int len) throws IOException
|
||||
{
|
||||
// The ranges are validated by BZip2InputStream
|
||||
for (int i = 0; i < len; i++)
|
||||
{
|
||||
int b = read();
|
||||
if (b < 0)
|
||||
{
|
||||
// EOF
|
||||
return i > 0 ? i : -1;
|
||||
}
|
||||
barr[off + i] = (byte) (b & 0xFF);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
{
|
||||
m_wrapped.close();
|
||||
super.close();
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* This is used to propagate errors from encoding threads to the thread using
|
||||
* the {@link BZip2OutputStream} when there is only one object using the
|
||||
* encoder.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class SingleObserverErrorState implements ErrorState
|
||||
{
|
||||
private final AtomicReference<Throwable> m_exception = new AtomicReference<Throwable>();
|
||||
|
||||
public void checkAndClearErrors(Object ownerToken) throws Error, RuntimeException, IOException
|
||||
{
|
||||
Throwable t = m_exception.getAndSet(null);
|
||||
if (t != null)
|
||||
{
|
||||
if (t instanceof IOException)
|
||||
{
|
||||
throw (IOException) t;
|
||||
}
|
||||
else if (t instanceof RuntimeException)
|
||||
{
|
||||
throw (RuntimeException) t;
|
||||
}
|
||||
else if (t instanceof Error)
|
||||
{
|
||||
throw (Error) t;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new RuntimeException(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void registerError(Throwable t, Object ownerToken)
|
||||
{
|
||||
m_exception.set(t);
|
||||
}
|
||||
}
|
992
src/main/java/org/at4j/comp/bzip2/ThreeWayRadixQuicksort.java
Normal file
992
src/main/java/org/at4j/comp/bzip2/ThreeWayRadixQuicksort.java
Normal file
@ -0,0 +1,992 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* This sort algorithm is used by the Burrows Wheeler encoder to sort the data
|
||||
* to encode. It is an amalgation of three different sort algorithms. Radix sort
|
||||
* is used to divide the input into 65536 different buckets. The quicksort is
|
||||
* used to sort each bucket. When the quicksort iterations produce short enough
|
||||
* blocks, shell sort is used.
|
||||
* <p>
|
||||
* See <a href="http://www.ddj.com/architect/184410724">Dr. Dobb's Journal from
|
||||
* November 01 1998</a>.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
final class ThreeWayRadixQuicksort
|
||||
{
|
||||
// The amount of overshoot in the data. See below.
|
||||
static final int DATA_OVERSHOOT = 20;
|
||||
|
||||
// The deepest sort that we do with quicksort. Deeper sorts use shell sort.
|
||||
// This value should be less than the DATA_OVERSHOOT.
|
||||
private static final int QUICKSORT_DEPTH_THRESHOLD = 18;
|
||||
|
||||
// The size of the sorting stack. This size is the same as for bzip2 1.0.5.
|
||||
static final int SORT_STACK_SIZE = 100;
|
||||
|
||||
/**
|
||||
* The increments for shell sort. Borrowed from bzip2.
|
||||
* <p>
|
||||
* Knuth's increments seem to work better than Incerpi-Sedgewick here.
|
||||
* Possibly because the number of elems to sort is usually small, typically
|
||||
* <= 20.
|
||||
*/
|
||||
private static final int[] SHELL_SORT_INCREMENTS = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
|
||||
|
||||
// Declared package private for the unit tests
|
||||
static class QuickSortRangeInfo
|
||||
{
|
||||
private final int m_bucketStartPos;
|
||||
// The length of the bucket measured in number of symbols.
|
||||
private final int m_bucketLen;
|
||||
private final int m_depth;
|
||||
|
||||
QuickSortRangeInfo(int bucketStartPos, int bucketLen, int depth)
|
||||
{
|
||||
m_bucketStartPos = bucketStartPos;
|
||||
m_bucketLen = bucketLen;
|
||||
m_depth = depth;
|
||||
}
|
||||
}
|
||||
|
||||
// The data array.
|
||||
private final byte[] m_data;
|
||||
// The length of the data in the array. Data occupies the positions 0 to
|
||||
// m_length - 1 in the array.
|
||||
private final int m_length;
|
||||
// The shortest data block length that quicksort will be used for. For
|
||||
// shorter blocks, shell sort is used.
|
||||
private final int m_minLengthForQuicksort;
|
||||
// Contains preallocated data structures. Used to reduce the number of
|
||||
// temporary objects that are created and thus avoid time spent gc:ing.
|
||||
private final EncodingScratchpad m_scratchpad;
|
||||
// Cache with sort results that are used to speed up the sorting. This works
|
||||
// because all strings to sort are rotations of a single string.
|
||||
private final int[] m_sortCache;
|
||||
// Use a stack of sort range information instead of calling the quicksort
|
||||
// methods recursively.
|
||||
private final QuickSortRangeInfo[] m_sortStack;
|
||||
// A pointer to the current position in the sort stack.
|
||||
private int m_sortStackPointer = -1;
|
||||
// Array containing a pointer for each element in m_data to its location in
|
||||
// the sorted data.
|
||||
// This is declared package private for the unit tests.
|
||||
final int[] m_ptr;
|
||||
|
||||
/**
|
||||
* Create a new sorting object.
|
||||
* @param data The data to sort. This array should contain an overshoot of
|
||||
* {@code DATA_OVERSHOOT} bytes. I.e: the data array should have a length of
|
||||
* at least {@code length + DATA_OVERSHOOT} bytes, and the last {@code
|
||||
* DATA_OVERSHOOT} bytes should be equal to the first {@code DATA_OVERSHOOT}
|
||||
* bytes. This makes a few sorting optimizations possible.
|
||||
* <p>
|
||||
* If the length of the data is less than {@code DATA_OVERSHOOT} bytes, the
|
||||
* overshoot should contain the data repeated.
|
||||
* @param minLengthForQuicksort Segments that are shorter than this length
|
||||
* are sorted with shell sort instead of quicksort.
|
||||
*/
|
||||
ThreeWayRadixQuicksort(final byte[] data, final int length, final int minLengthForQuicksort, final EncodingScratchpad sp) throws IllegalArgumentException
|
||||
{
|
||||
assert data.length >= length + DATA_OVERSHOOT;
|
||||
|
||||
if (length > data.length)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid data length " + length + ". It must be <= the length of the data array (" + data.length + ")");
|
||||
}
|
||||
if (minLengthForQuicksort < 3)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid minimum length for Quicksort " + minLengthForQuicksort + ". It must be >= 3");
|
||||
}
|
||||
m_data = data;
|
||||
m_length = length;
|
||||
m_minLengthForQuicksort = minLengthForQuicksort;
|
||||
m_scratchpad = sp;
|
||||
m_sortStack = m_scratchpad.m_sortStack;
|
||||
// Clear the sortCache array
|
||||
m_sortCache = m_scratchpad.m_sortCache;
|
||||
Arrays.fill(m_sortCache, 0);
|
||||
m_ptr = m_scratchpad.m_ptrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the data at the specified position. It is assumed that the position
|
||||
* is within the range of the data.
|
||||
* <p>
|
||||
* This method is so small so that it will likely be inlined by the Java
|
||||
* compiler.
|
||||
*/
|
||||
private int getDataAt(final int pos)
|
||||
{
|
||||
return m_data[pos] & 0xFF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make the initial radix sort of the data into 65536 buckets. As a side
|
||||
* effect, this method populates the {@code m_ptr} array with the results of
|
||||
* the sort.
|
||||
* <p>
|
||||
* This method is declared package-private for the unit tests.
|
||||
* @return The start positions for each bucket (in the {@code m_ptr} array).
|
||||
*/
|
||||
int[] radixSort()
|
||||
{
|
||||
// This array will contain the frequencies of each two byte combination
|
||||
// in the data.
|
||||
final int[] frequencies = m_scratchpad.m_twoByteFrequencies;
|
||||
Arrays.fill(frequencies, 0);
|
||||
|
||||
// Iterate over the data and collect the frequencies of each occurring
|
||||
// two byte combination.
|
||||
int val = getDataAt(0) << 8;
|
||||
for (int i = m_length - 1; i >= 0; i--)
|
||||
{
|
||||
val = val >>> 8 | (getDataAt(i) << 8);
|
||||
frequencies[val]++;
|
||||
}
|
||||
|
||||
// Convert the frequencies array to contain the last data element
|
||||
// position + 1 for each two byte bucket.
|
||||
for (int i = 1; i < 65536; i++)
|
||||
{
|
||||
frequencies[i] += frequencies[i - 1];
|
||||
}
|
||||
|
||||
// The m_ptr array will contain the pointers between each two byte
|
||||
// combination's bucket location and its location in the data array.
|
||||
// This loop will also modify the frequencies array to contain the
|
||||
// starting position of each data bucket.
|
||||
val = getDataAt(0) << 8;
|
||||
for (int i = m_length - 1; i >= 0; i--)
|
||||
{
|
||||
val = val >>> 8 | (getDataAt(i) << 8);
|
||||
int pos = --frequencies[val];
|
||||
m_ptr[pos] = i;
|
||||
}
|
||||
|
||||
// Now frequencies contain the first location of each bucket and m_ptr
|
||||
// contains pointers between the data locations in the buckets and the
|
||||
// data in the data array.
|
||||
|
||||
return frequencies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the position that contains the median of the values at the three
|
||||
* positions.
|
||||
*/
|
||||
private int med3(final int pos1, final int pos2, final int pos3, final int depth)
|
||||
{
|
||||
int v1, v2, v3;
|
||||
if ((v1 = getDataAt(m_ptr[pos1] + depth)) == (v2 = getDataAt(m_ptr[pos2] + depth)))
|
||||
{
|
||||
return pos1;
|
||||
}
|
||||
if (((v3 = getDataAt(m_ptr[pos3] + depth)) == v1) || (v3 == v2))
|
||||
{
|
||||
return pos3;
|
||||
}
|
||||
return v1 < v2 ? (v2 < v3 ? pos2 : (v1 < v3 ? pos3 : pos1)) : (v2 > v3 ? pos2 : (v1 < v3 ? pos1 : pos3));
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the pivot value for the quicksort.
|
||||
* @return The position of the pivot value.
|
||||
*/
|
||||
private int selectPivot(final QuickSortRangeInfo qsri)
|
||||
{
|
||||
int pos1 = qsri.m_bucketStartPos;
|
||||
int pos3 = pos1 + qsri.m_bucketLen - 1;
|
||||
int pos2 = (pos1 + pos3) / 2;
|
||||
|
||||
// For a large bucket, use a median of three median values
|
||||
if (qsri.m_bucketLen > 500)
|
||||
{
|
||||
int d = qsri.m_bucketLen / 8;
|
||||
pos1 = med3(pos1, pos1 + d, pos1 + 2 * d, qsri.m_depth);
|
||||
pos2 = med3(pos2 - d, pos2, pos2 + d, qsri.m_depth);
|
||||
pos3 = med3(pos3 - 2 * d, pos3 - d, pos3, qsri.m_depth);
|
||||
}
|
||||
return med3(pos1, pos2, pos3, qsri.m_depth);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the elements in the two positions in the array.
|
||||
*/
|
||||
private void swap(final int pos1, final int pos2)
|
||||
{
|
||||
int v1 = m_ptr[pos1];
|
||||
m_ptr[pos1] = m_ptr[pos2];
|
||||
m_ptr[pos2] = v1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shell sort the data in the range. This is used for data ranges that are
|
||||
* too short to be quicksorted.
|
||||
* <p>
|
||||
* This method is declared package private for the unit tests.
|
||||
*/
|
||||
void shellSortRange(final QuickSortRangeInfo qsri)
|
||||
{
|
||||
// If the implementation of this method looks strange it is because it
|
||||
// is heavily optimized.
|
||||
|
||||
final int len = qsri.m_bucketLen;
|
||||
final int depth = qsri.m_depth;
|
||||
final int startPos = qsri.m_bucketStartPos;
|
||||
final int endPos = startPos + len;
|
||||
int incMax = 1;
|
||||
while (SHELL_SORT_INCREMENTS[incMax] < len)
|
||||
{
|
||||
incMax++;
|
||||
}
|
||||
|
||||
for (int incrementPtr = incMax - 1; incrementPtr >= 0; incrementPtr--)
|
||||
{
|
||||
final int increment = SHELL_SORT_INCREMENTS[incrementPtr];
|
||||
final int startIter = startPos + increment;
|
||||
for (int i = startIter; i < endPos; i++)
|
||||
{
|
||||
INCLOOP: for (int j = i; j >= startIter; j -= increment)
|
||||
{
|
||||
int curDepth = depth;
|
||||
int curPos1 = m_ptr[j - increment] + depth - 1;
|
||||
int curPos2 = m_ptr[j] + depth - 1;
|
||||
|
||||
// Tests with sort cache lookups.
|
||||
// Inner loop.
|
||||
while (true)
|
||||
{
|
||||
while (curPos1 >= m_length)
|
||||
{
|
||||
curPos1 -= m_length;
|
||||
}
|
||||
while (curPos2 >= m_length)
|
||||
{
|
||||
curPos2 -= m_length;
|
||||
}
|
||||
|
||||
// Eight tests with sort cache lookups. The data
|
||||
// overshoot helps us to avoid range checks when
|
||||
// the pointers are incremented.
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 2
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 3
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 4
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 5
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 6
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 7
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
// 8
|
||||
if (getDataAt(++curPos1) == getDataAt(++curPos2))
|
||||
{
|
||||
if (m_sortCache[curPos1] == m_sortCache[curPos2])
|
||||
{
|
||||
curDepth += 8;
|
||||
if (curDepth >= m_length)
|
||||
{
|
||||
// The strings are exactly equal. This can happen for bzip2 when
|
||||
// we have input such as AAA (only) that does not get run length
|
||||
// encoded.
|
||||
break INCLOOP;
|
||||
}
|
||||
|
||||
// The eight symbols were equals and no cache hits. Continue the inner loop
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sortCache[curPos1] < m_sortCache[curPos2])
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataAt(curPos1) < getDataAt(curPos2))
|
||||
{
|
||||
break INCLOOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
swap(j - increment, j);
|
||||
continue INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the index of the string that has the first differing value at the
|
||||
* given depth compared to the first string in the range.
|
||||
* @param bucketStartPos The start of the range.
|
||||
* @param bucketLen The length of the range.
|
||||
* @param depth The depth to investigate.
|
||||
* @return The index of the first differing value, or {@code -1} if all
|
||||
* values are equal at the given depth.
|
||||
*/
|
||||
private int getPositionOfFirstDifferingValue(final int bucketStartPos, final int bucketLen, final int depth)
|
||||
{
|
||||
assert depth <= DATA_OVERSHOOT;
|
||||
|
||||
final int c0 = getDataAt(m_ptr[bucketStartPos] + depth);
|
||||
final int upperBound = bucketStartPos + bucketLen;
|
||||
for (int i = bucketStartPos + 1; i < upperBound; i++)
|
||||
{
|
||||
if (getDataAt(m_ptr[i] + depth) != c0)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// All values at this depth are equal
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the {@code len} values after {@code r1Start} with the {@code len}
|
||||
* values after {@code r2start}.
|
||||
* @param r1Start The start of the first range.
|
||||
* @param r2Start The start of the second range.
|
||||
* @param len The number of bytes to swap.
|
||||
*/
|
||||
private void swapRanges(final int r1Start, final int r2Start, final int len)
|
||||
{
|
||||
assert r1Start + len <= r2Start;
|
||||
|
||||
// Is the scratchpad's temp area large enough?
|
||||
if (m_scratchpad.m_tempArea.length < len)
|
||||
{
|
||||
// No. Reallocate it
|
||||
m_scratchpad.m_tempArea = new int[len + 100];
|
||||
}
|
||||
|
||||
System.arraycopy(m_ptr, r1Start, m_scratchpad.m_tempArea, 0, len);
|
||||
System.arraycopy(m_ptr, r2Start, m_ptr, r1Start, len);
|
||||
System.arraycopy(m_scratchpad.m_tempArea, 0, m_ptr, r2Start, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the range to the stack containing ranges that are left to sort.
|
||||
*/
|
||||
private void addRangeToStack(final int bucketStartPos, final int bucketLen, final int depth)
|
||||
{
|
||||
if (bucketLen < 2)
|
||||
{
|
||||
// Already sorted
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_sortStack[++m_sortStackPointer] = new QuickSortRangeInfo(bucketStartPos, bucketLen, depth);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Quicksort the range.
|
||||
* <p>
|
||||
* This method is declared package-private for the unit tests.
|
||||
*/
|
||||
void quickSortRange(final QuickSortRangeInfo qsri)
|
||||
{
|
||||
// Select the pivot element.
|
||||
final int pivot = selectPivot(qsri);
|
||||
|
||||
// Move the pivot into the first position
|
||||
swap(qsri.m_bucketStartPos, pivot);
|
||||
|
||||
// First check if all characters are equal at the given depth, in which
|
||||
// case we increase the depth and try again
|
||||
int sortDepth = qsri.m_depth;
|
||||
|
||||
// The sort depth threshold should be less than the overshoot. If it
|
||||
// were not, we would have to think of the boundaries of the m_data
|
||||
// array and such.
|
||||
assert sortDepth < DATA_OVERSHOOT;
|
||||
|
||||
int posAtFirstDifferingValue = getPositionOfFirstDifferingValue(qsri.m_bucketStartPos, qsri.m_bucketLen, sortDepth);
|
||||
while (posAtFirstDifferingValue == -1)
|
||||
{
|
||||
// All characters at the current depth are equal. Sort using an
|
||||
// increased depth.
|
||||
|
||||
if (sortDepth == m_length)
|
||||
{
|
||||
// We hit the tiles. All strings are equal.
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (++sortDepth < QUICKSORT_DEPTH_THRESHOLD)
|
||||
{
|
||||
posAtFirstDifferingValue = getPositionOfFirstDifferingValue(qsri.m_bucketStartPos, qsri.m_bucketLen, sortDepth);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use shell sort instead
|
||||
shellSortRange(qsri);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort using the calculated depth.
|
||||
|
||||
// Iterate through the data to sort using two pointers advancing
|
||||
// from each end of the data range to sort.
|
||||
// Create one area at the start of the range and one at the end of
|
||||
// the range where we move values that are equal to the pivot value.
|
||||
int lowPtr = posAtFirstDifferingValue;
|
||||
// Pointer pointing to the element after the lower pivot range
|
||||
int lowPivotRangePtr = posAtFirstDifferingValue;
|
||||
int hiPtr = qsri.m_bucketStartPos + qsri.m_bucketLen - 1;
|
||||
// Pointer pointing to the element before the upper pivot range.
|
||||
int hiPivotRangePtr = hiPtr;
|
||||
int pivotVal = getDataAt(m_ptr[qsri.m_bucketStartPos] + sortDepth);
|
||||
while (true)
|
||||
{
|
||||
int curData;
|
||||
// Move the lower pointer forward
|
||||
while (lowPtr <= hiPtr && (curData = getDataAt(m_ptr[lowPtr] + sortDepth)) <= pivotVal)
|
||||
{
|
||||
if (curData == pivotVal)
|
||||
{
|
||||
// Move the data into the lower pivot range and increase
|
||||
// the pivot range pointer.
|
||||
swap(lowPtr, lowPivotRangePtr++);
|
||||
}
|
||||
lowPtr++;
|
||||
}
|
||||
|
||||
// Move the upper pointer backwards
|
||||
while (lowPtr <= hiPtr && (curData = getDataAt(m_ptr[hiPtr] + sortDepth)) >= pivotVal)
|
||||
{
|
||||
if (curData == pivotVal)
|
||||
{
|
||||
// Move the data into the upper pivot range and decrease
|
||||
// the pivot range pointer.
|
||||
swap(hiPtr, hiPivotRangePtr--);
|
||||
}
|
||||
hiPtr--;
|
||||
}
|
||||
|
||||
if (lowPtr > hiPtr)
|
||||
{
|
||||
// We're done
|
||||
break;
|
||||
}
|
||||
|
||||
// Now the value at lowPtr is larger than the pivot
|
||||
// value and the value at hiPtr is smaller. Swap the two
|
||||
// values and continue moving the pointers.
|
||||
swap(lowPtr++, hiPtr--);
|
||||
}
|
||||
|
||||
// Merge and move the two pivot ranges to the center of the array
|
||||
// and sort the three resulting segments.
|
||||
|
||||
// Swap the smallest possible ranges
|
||||
final int lowRangeLen = lowPtr - lowPivotRangePtr;
|
||||
int rlen = Math.min(lowPivotRangePtr - qsri.m_bucketStartPos, lowRangeLen);
|
||||
if (rlen > 0)
|
||||
{
|
||||
swapRanges(qsri.m_bucketStartPos, lowPtr - rlen, rlen);
|
||||
}
|
||||
|
||||
final int hiRangeLen = hiPivotRangePtr - hiPtr;
|
||||
rlen = Math.min(qsri.m_bucketStartPos + qsri.m_bucketLen - hiPivotRangePtr - 1, hiRangeLen);
|
||||
if (rlen > 0)
|
||||
{
|
||||
swapRanges(lowPtr, qsri.m_bucketStartPos + qsri.m_bucketLen - rlen, rlen);
|
||||
}
|
||||
final int pivotRangeLen = qsri.m_bucketLen - lowRangeLen - hiRangeLen;
|
||||
|
||||
// Sort the lower range
|
||||
addRangeToStack(qsri.m_bucketStartPos, lowRangeLen, sortDepth);
|
||||
// Sort the pivot range at an increased depth
|
||||
addRangeToStack(qsri.m_bucketStartPos + lowRangeLen, pivotRangeLen, sortDepth + 1);
|
||||
// Sort the higher range
|
||||
addRangeToStack(qsri.m_bucketStartPos + lowRangeLen + pivotRangeLen, hiRangeLen, sortDepth);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort all strings in the bucket.
|
||||
* <p>
|
||||
* This method is declared package private for the unit tests.
|
||||
* @param bucketStartPos The start position of the bucket.
|
||||
* @param bucketLen The length of the bucket.
|
||||
* @param depth The depth to start comparing strings at. (The strings are
|
||||
* all equal at lower depths.)
|
||||
*/
|
||||
void sortBucket(final int bucketStartPos, final int bucketLen, final int depth)
|
||||
{
|
||||
if (bucketLen < 2)
|
||||
{
|
||||
// Already sorted
|
||||
return;
|
||||
}
|
||||
|
||||
assert m_sortStackPointer == -1;
|
||||
|
||||
// Use a stack with quick sort pass settings instead of recursing since
|
||||
// the stack may become very large.
|
||||
m_sortStack[++m_sortStackPointer] = new QuickSortRangeInfo(bucketStartPos, bucketLen, depth);
|
||||
while (m_sortStackPointer >= 0)
|
||||
{
|
||||
QuickSortRangeInfo qsri = m_sortStack[m_sortStackPointer--];
|
||||
|
||||
// The minimum length of the segments to sort is 2. That is ensured
|
||||
// by the addRangeToStack method.
|
||||
|
||||
if ((qsri.m_bucketLen < m_minLengthForQuicksort) || (qsri.m_depth > QUICKSORT_DEPTH_THRESHOLD))
|
||||
{
|
||||
shellSortRange(qsri);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This adds up to three new sort ranges to the stack
|
||||
// (values less than, equal to and higher than the pivot value)
|
||||
quickSortRange(qsri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the sort order for all big buckets. (256 of them in all, each
|
||||
* containing 256 small buckets.)
|
||||
* <p>
|
||||
* Smaller buckets are sorted before larger. This is a more efficient way of
|
||||
* filling the sort cache.
|
||||
* @param bucketStartPositions The start positions for all large buckets.
|
||||
* @return An array containing the indices of the large buckets in the order
|
||||
* that they should be sorted.
|
||||
*/
|
||||
private int[] establishSortOrder(final int[] bucketStartPositions)
|
||||
{
|
||||
final int[] sortOrder = m_scratchpad.m_sortOrder;
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
sortOrder[i] = i;
|
||||
}
|
||||
|
||||
// Shell sort the sort orders
|
||||
// incPtr == 4 gives an increment of 121
|
||||
for (int incPtr = 4; incPtr >= 0; incPtr--)
|
||||
{
|
||||
final int increment = SHELL_SORT_INCREMENTS[incPtr];
|
||||
for (int i = increment; i < sortOrder.length; i++)
|
||||
{
|
||||
INCLOOP: for (int j = i; j >= increment; j -= increment)
|
||||
{
|
||||
// Which of the lengths of the big buckets is the longest
|
||||
final int so1 = sortOrder[j - increment];
|
||||
final int so2 = sortOrder[j];
|
||||
if ((bucketStartPositions[so1 * 256 + 255] - bucketStartPositions[so1 * 256]) > (bucketStartPositions[so2 * 256 + 255] - bucketStartPositions[so2 * 256]))
|
||||
{
|
||||
sortOrder[j] = so1;
|
||||
sortOrder[j - increment] = so2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// This sort order element is in its right position.
|
||||
break INCLOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sortOrder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort the data. This method borrows optimizations from bzip2 1.0.5.
|
||||
* @return An array with pointers from each byte's original position to its
|
||||
* position in the sorted data.
|
||||
*/
|
||||
int[] sort()
|
||||
{
|
||||
if (m_length == 0)
|
||||
{
|
||||
return new int[0];
|
||||
}
|
||||
|
||||
// Run a least significant digit radix sort on all two-byte permutations
|
||||
// of the incoming data. This gives 256^2 buckets with similar data
|
||||
// which can then be sorted individually.
|
||||
|
||||
// This method call also creates and populates the m_ptr array.
|
||||
// The bucketStartPositions has an overshoot of one position, which
|
||||
// gives it the length 65537. The overshoot element should be equal to
|
||||
// the length of the data.
|
||||
final int[] bucketStartPositions = radixSort();
|
||||
// Fix the overshoot
|
||||
bucketStartPositions[65536] = m_length;
|
||||
|
||||
final boolean[] sortedLargeBuckets = m_scratchpad.m_sortedLargeBuckets;
|
||||
Arrays.fill(sortedLargeBuckets, false);
|
||||
final boolean[] sortedSmallBuckets = m_scratchpad.m_sortedSmallBuckets;
|
||||
Arrays.fill(sortedSmallBuckets, false);
|
||||
final int[] copyStart = m_scratchpad.m_copyStart;
|
||||
final int[] copyEnd = m_scratchpad.m_copyEnd;
|
||||
|
||||
// Establish a sort order for all big buckets (256 of them in all) with
|
||||
// the shortest buckets coming first. This will make the sort result
|
||||
// caching optimization most efficient
|
||||
final int[] sortOrder = establishSortOrder(bucketStartPositions);
|
||||
|
||||
// Quick sort the elements in each non-empty bucket.
|
||||
for (int largeBucketIndex = 0; largeBucketIndex < 256; largeBucketIndex++)
|
||||
{
|
||||
final int largeBucketNo = sortOrder[largeBucketIndex];
|
||||
for (int smallBucketNo = 0; smallBucketNo < 256; smallBucketNo++)
|
||||
{
|
||||
// Don't sort when smallBucketNo == largeBucketNo. This small
|
||||
// bucket will be dealt with by the scanning step below.
|
||||
if (smallBucketNo != largeBucketNo)
|
||||
{
|
||||
final int bucketIndex = largeBucketNo * 256 + smallBucketNo;
|
||||
if (!sortedSmallBuckets[bucketIndex])
|
||||
{
|
||||
final int bucketStartPos = bucketStartPositions[bucketIndex];
|
||||
final int bucketLen = bucketStartPositions[bucketIndex + 1] - bucketStartPos;
|
||||
|
||||
if (bucketLen > 1)
|
||||
{
|
||||
// More than one data element in this bucket. Sort it.
|
||||
sortBucket(bucketStartPos, bucketLen, 2);
|
||||
}
|
||||
sortedSmallBuckets[bucketIndex] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we have sorted all small buckets in the large bucket n,
|
||||
// we can infer the sorted order for the small bucket n in all
|
||||
// large buckets m, including (magically) the small bucket n in the
|
||||
// large bucket n that we did not sort above.
|
||||
for (int m = 0; m < 256; m++)
|
||||
{
|
||||
copyStart[m] = bucketStartPositions[m * 256 + largeBucketNo];
|
||||
copyEnd[m] = bucketStartPositions[m * 256 + largeBucketNo + 1] - 1;
|
||||
}
|
||||
|
||||
for (int i = bucketStartPositions[largeBucketNo * 256]; i < copyStart[largeBucketNo]; i++)
|
||||
{
|
||||
int k = m_ptr[i] - 1;
|
||||
if (k < 0)
|
||||
{
|
||||
k += m_length;
|
||||
}
|
||||
final int m = getDataAt(k);
|
||||
if (!sortedLargeBuckets[m])
|
||||
{
|
||||
int index = copyStart[m]++;
|
||||
if (index >= m_length)
|
||||
{
|
||||
index -= m_length;
|
||||
}
|
||||
m_ptr[index] = k;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = bucketStartPositions[(largeBucketNo + 1) * 256] - 1; i > copyEnd[largeBucketNo]; i--)
|
||||
{
|
||||
int k = m_ptr[i] - 1;
|
||||
if (k < 0)
|
||||
{
|
||||
k += m_length;
|
||||
}
|
||||
final int m = getDataAt(k);
|
||||
if (!sortedLargeBuckets[m])
|
||||
{
|
||||
int index = copyEnd[m]--;
|
||||
if (index < 0)
|
||||
{
|
||||
index += m_length;
|
||||
}
|
||||
m_ptr[index] = k;
|
||||
}
|
||||
}
|
||||
|
||||
// Mark all buckets that we got for free as sorted
|
||||
for (int m = 0; m < 256; m++)
|
||||
{
|
||||
sortedSmallBuckets[m * 256 + largeBucketNo] = true;
|
||||
}
|
||||
|
||||
sortedLargeBuckets[largeBucketNo] = true;
|
||||
|
||||
// Fix the sort cache for the large bucket.
|
||||
// Don't do it for the last sorted bucket.
|
||||
if (largeBucketIndex != 255)
|
||||
{
|
||||
final int largeBucketStart = bucketStartPositions[largeBucketNo * 256];
|
||||
final int largeBucketEnd;
|
||||
if (largeBucketNo < 255)
|
||||
{
|
||||
largeBucketEnd = bucketStartPositions[(largeBucketNo + 1) * 256];
|
||||
}
|
||||
else
|
||||
{
|
||||
largeBucketEnd = m_length;
|
||||
}
|
||||
final int largeBucketSize = largeBucketEnd - largeBucketStart;
|
||||
assert largeBucketSize >= 0;
|
||||
|
||||
int shifts = 0;
|
||||
while (largeBucketSize >>> shifts > 65534)
|
||||
{
|
||||
shifts++;
|
||||
}
|
||||
|
||||
for (int i = largeBucketSize - 1; i >= 0; i--)
|
||||
{
|
||||
final int sptr = m_ptr[largeBucketStart + i];
|
||||
final int qval = i >>> shifts;
|
||||
m_sortCache[sptr] = qval;
|
||||
if (sptr < DATA_OVERSHOOT)
|
||||
{
|
||||
// Update cache in overshoot too
|
||||
m_sortCache[m_length + sptr] = qval;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return m_ptr;
|
||||
}
|
||||
}
|
29
src/main/java/org/at4j/comp/bzip2/package-info.java
Normal file
29
src/main/java/org/at4j/comp/bzip2/package-info.java
Normal file
@ -0,0 +1,29 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
* This package contains the {@link org.at4j.comp.bzip2.BZip2InputStream} and
|
||||
* {@link org.at4j.comp.bzip2.BZip2OutputStream} stream implementations for
|
||||
* decompressing and compressing data. The
|
||||
* {@link org.at4j.comp.bzip2.BZip2ReadableFile} and
|
||||
* {@link org.at4j.comp.bzip2.BZip2WritableFile} can be used to transparently
|
||||
* decompress and compress data in files.
|
||||
* @since 1.0
|
||||
* @author Karl Gustafsson
|
||||
*/
|
||||
package org.at4j.comp.bzip2;
|
24
src/main/java/org/at4j/comp/package-info.java
Normal file
24
src/main/java/org/at4j/comp/package-info.java
Normal file
@ -0,0 +1,24 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
* The {@link org.at4j.comp.CompressionLevel} enum.
|
||||
* @since 1.0
|
||||
* @author Karl Gustafsson
|
||||
*/
|
||||
package org.at4j.comp;
|
173
src/main/java/org/at4j/support/comp/ByteMoveToFront.java
Normal file
173
src/main/java/org/at4j/support/comp/ByteMoveToFront.java
Normal file
@ -0,0 +1,173 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.comp;
|
||||
|
||||
/**
|
||||
* A move-to-front (MTF) encoder and decoder for bytes. For more information on
|
||||
* MTF encoding, see<a href="http://en.wikipedia.org/wiki/Move_to_front">the
|
||||
* Wikipedia article on move-to-front transforms</a>.
|
||||
* <p>
|
||||
* This object is not thread safe. Clients must provide external synchronization
|
||||
* if they are to use it from several concurrent threads.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
* @see IntMoveToFront
|
||||
*/
|
||||
public class ByteMoveToFront
|
||||
{
|
||||
private final byte[] m_alphabet;
|
||||
|
||||
private static byte[] createByteAlphabetFromRange(int minVal, int maxVal) throws IndexOutOfBoundsException
|
||||
{
|
||||
if ((minVal < 0) || (maxVal > 255) || (minVal >= maxVal))
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid min and/or max value: min " + minVal + ", max " + maxVal);
|
||||
}
|
||||
int alphLen = maxVal - minVal + 1;
|
||||
byte[] alphabet = new byte[alphLen];
|
||||
for (int i = 0; i < alphLen; i++)
|
||||
{
|
||||
alphabet[i] = (byte) ((i + minVal) & 0xFF);
|
||||
}
|
||||
return alphabet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a byte MTF encoder/decoder that transforms bytes in the range
|
||||
* between {@code minValue} and {@code maxValue}.
|
||||
* <p>
|
||||
* The initial alphabet of the transformer will be {@code minValue …
|
||||
* maxValue}.
|
||||
* @param minValue The start value of the range. This should be an unsigned
|
||||
* byte in the range 0 to 254.
|
||||
* @param maxValue The end value of the range. This should be an unsigned
|
||||
* byte in the range 1 to 255.
|
||||
* @throws IndexOutOfBoundsException If the min and/or the max values are
|
||||
* not unsigned bytes or if the min value is equal to or greater than the
|
||||
* max value.
|
||||
*/
|
||||
public ByteMoveToFront(int minValue, int maxValue) throws IndexOutOfBoundsException
|
||||
{
|
||||
this(createByteAlphabetFromRange(minValue, maxValue));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a byte MTF encoder/decoder that transforms bytes using the
|
||||
* supplied initial alphabet.
|
||||
* @param alphabet The initial alphabet. This byte array is <i>not</i>
|
||||
* copied by this method and it will be modified by encoding or decoding
|
||||
* operations.
|
||||
*/
|
||||
public ByteMoveToFront(byte[] alphabet)
|
||||
{
|
||||
// Null check
|
||||
alphabet.getClass();
|
||||
|
||||
m_alphabet = alphabet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the bytes in {@code in} and store them in the array {@code out}.
|
||||
* The MTF alphabet is also updated by this method.
|
||||
* @param in The bytes to encode.
|
||||
* @param out The array to store the encoded bytes in. This array must be at
|
||||
* least as long as {@code in}.
|
||||
* @return {@code out}
|
||||
* @throws ArrayIndexOutOfBoundsException If any of the bytes in {@code in}
|
||||
* are not in the MTF alphabet.
|
||||
* @throws IllegalArgumentException If the {@code out} array is too short.
|
||||
*/
|
||||
public byte[] encode(byte[] in, byte[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
|
||||
{
|
||||
if (out.length < in.length)
|
||||
{
|
||||
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
|
||||
}
|
||||
|
||||
for (int i = 0; i < in.length; i++)
|
||||
{
|
||||
byte val = in[i];
|
||||
if (m_alphabet[0] == val)
|
||||
{
|
||||
out[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
byte prev = m_alphabet[0];
|
||||
int j = 1;
|
||||
while (true)
|
||||
{
|
||||
byte nextPrev = m_alphabet[j];
|
||||
if (m_alphabet[j] == val)
|
||||
{
|
||||
out[i] = (byte) (j & 0xFF);
|
||||
m_alphabet[0] = m_alphabet[j];
|
||||
m_alphabet[j] = prev;
|
||||
break;
|
||||
}
|
||||
m_alphabet[j] = prev;
|
||||
prev = nextPrev;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a single byte and update the MTF alphabet.
|
||||
* @param index The index in the MTF alphabet for the byte.
|
||||
* @return The byte.
|
||||
*/
|
||||
public byte decode(int index)
|
||||
{
|
||||
byte val = m_alphabet[index];
|
||||
for (int j = index; j > 0; j--)
|
||||
{
|
||||
m_alphabet[j] = m_alphabet[j - 1];
|
||||
}
|
||||
m_alphabet[0] = val;
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode an array of bytes and update the MTF alphabet. The decoded bytes
|
||||
* are stored in {@code out}.
|
||||
* @param in The bytes to decode.
|
||||
* @param out The array to store the decoded bytes in. This array must be at
|
||||
* least as long as {@code in}.
|
||||
* @return {@code out}
|
||||
* @throws ArrayIndexOutOfBoundsException If any of the bytes in {@code in}
|
||||
* are not in the MTF alphabet.
|
||||
* @throws IllegalArgumentException If {@code out} is too short.
|
||||
*/
|
||||
public byte[] decode(byte[] in, byte[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
|
||||
{
|
||||
if (out.length < in.length)
|
||||
{
|
||||
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
|
||||
}
|
||||
|
||||
for (int i = 0; i < in.length; i++)
|
||||
{
|
||||
out[i] = decode(in[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
177
src/main/java/org/at4j/support/comp/IntMoveToFront.java
Normal file
177
src/main/java/org/at4j/support/comp/IntMoveToFront.java
Normal file
@ -0,0 +1,177 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.comp;
|
||||
|
||||
/**
|
||||
* A move-to-front (MTF) encoder and decoder for integers. For more information
|
||||
* on MTF encoding, see<a href="http://en.wikipedia.org/wiki/Move_to_front">the
|
||||
* Wikipedia article on move-to-front transforms</a>.
|
||||
* <p>
|
||||
* This object is not thread safe. Clients must provide external synchronization
|
||||
* if they are to use it from several concurrent threads.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
* @see ByteMoveToFront
|
||||
*/
|
||||
public class IntMoveToFront
|
||||
{
|
||||
private final int[] m_alphabet;
|
||||
|
||||
private static int[] createIntAlphabetFromRange(int minVal, int maxVal) throws IndexOutOfBoundsException
|
||||
{
|
||||
if (minVal >= maxVal)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid min and max values. Min=" + minVal + ", max=" + maxVal);
|
||||
}
|
||||
int alphLen = maxVal - minVal + 1;
|
||||
int[] alphabet = new int[alphLen];
|
||||
for (int i = 0; i < alphLen; i++)
|
||||
{
|
||||
alphabet[i] = i + minVal;
|
||||
}
|
||||
return alphabet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a byte MTF encoder/decoder that transforms integers in the range
|
||||
* between {@code minValue} and {@code maxValue}.
|
||||
* <p>
|
||||
* The initial alphabet of the transformer will be {@code minValue …
|
||||
* maxValue}.
|
||||
* @param minValue The start value of the range.
|
||||
* @param maxValue The end value of the range.
|
||||
* @throws IndexOutOfBoundsException If the min value is equal to or greater
|
||||
* than the max value.
|
||||
*/
|
||||
public IntMoveToFront(int minValue, int maxValue) throws IndexOutOfBoundsException
|
||||
{
|
||||
this(createIntAlphabetFromRange(minValue, maxValue));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a byte MTF encoder/decoder that transforms integers using the
|
||||
* supplied initial alphabet.
|
||||
* @param alphabet The initial alphabet. This integer array is <i>not</i>
|
||||
* copied by this method and it will be modified by encoding or decoding
|
||||
* operations.
|
||||
*/
|
||||
public IntMoveToFront(int[] alphabet)
|
||||
{
|
||||
// Null check
|
||||
alphabet.getClass();
|
||||
|
||||
m_alphabet = alphabet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the integers in {@code in} and store them in the array {@code out}
|
||||
* . The MTF alphabet is also updated by this method.
|
||||
* @param in The integers to encode.
|
||||
* @param out The array to store the encoded integers in. This array must be
|
||||
* at least as long as {@code in}.
|
||||
* @return {@code out}
|
||||
* @throws ArrayIndexOutOfBoundsException If any of the integers in {@code
|
||||
* in} are not in the MTF alphabet.
|
||||
* @throws IllegalArgumentException If the {@code out} array is too short.
|
||||
*/
|
||||
public int[] encode(int[] in, int[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
|
||||
{
|
||||
if (out.length < in.length)
|
||||
{
|
||||
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
|
||||
}
|
||||
|
||||
for (int i = 0; i < in.length; i++)
|
||||
{
|
||||
int val = in[i];
|
||||
if (m_alphabet[0] == val)
|
||||
{
|
||||
out[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int prev = m_alphabet[0];
|
||||
int j = 1;
|
||||
while (true)
|
||||
{
|
||||
int nextPrev = m_alphabet[j];
|
||||
if (m_alphabet[j] == val)
|
||||
{
|
||||
out[i] = (byte) (j & 0xFF);
|
||||
m_alphabet[0] = m_alphabet[j];
|
||||
m_alphabet[j] = prev;
|
||||
break;
|
||||
}
|
||||
m_alphabet[j] = prev;
|
||||
prev = nextPrev;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a single integer and update the MTF alphabet.
|
||||
* @param index The index in the MTF alphabet for the integer.
|
||||
* @return The integer.
|
||||
*/
|
||||
public int decode(int index)
|
||||
{
|
||||
int val = m_alphabet[index];
|
||||
for (int j = index; j > 0; j--)
|
||||
{
|
||||
m_alphabet[j] = m_alphabet[j - 1];
|
||||
}
|
||||
m_alphabet[0] = val;
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode an array of integers and update the MTF alphabet. The decoded
|
||||
* integers are stored in {@code out}.
|
||||
* @param in The integers to decode.
|
||||
* @param out The array to store the decoded integers in. This array must be
|
||||
* at least as long as {@code in}.
|
||||
* @return {@code out}
|
||||
* @throws ArrayIndexOutOfBoundsException If any of the integers in {@code
|
||||
* in} are not in the MTF alphabet.
|
||||
* @throws IllegalArgumentException If {@code out} is too short.
|
||||
*/
|
||||
public int[] decode(int[] in, int[] out) throws ArrayIndexOutOfBoundsException, IllegalArgumentException
|
||||
{
|
||||
if (out.length < in.length)
|
||||
{
|
||||
throw new IllegalArgumentException("The output array must be at least of the same length as the input array. Was in: " + in.length + ", out: " + out.length);
|
||||
}
|
||||
|
||||
for (int i = 0; i < in.length; i++)
|
||||
{
|
||||
int index = in[i];
|
||||
int val = m_alphabet[index];
|
||||
for (int j = index; j > 0; j--)
|
||||
{
|
||||
m_alphabet[j] = m_alphabet[j - 1];
|
||||
}
|
||||
m_alphabet[0] = val;
|
||||
out[i] = val;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
24
src/main/java/org/at4j/support/comp/package-info.java
Normal file
24
src/main/java/org/at4j/support/comp/package-info.java
Normal file
@ -0,0 +1,24 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
* Support classes for compression and decompression.
|
||||
* @since 1.1
|
||||
* @author Karl Gustafsson
|
||||
*/
|
||||
package org.at4j.support.comp;
|
180
src/main/java/org/at4j/support/io/BitInput.java
Normal file
180
src/main/java/org/at4j/support/io/BitInput.java
Normal file
@ -0,0 +1,180 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.io;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This interface identifies a source for bits.
|
||||
* <p>
|
||||
* The source is assumed to have a position which may or may not be at a byte
|
||||
* boundary (every eight bits).
|
||||
* <p>
|
||||
* If an implementing class also extends {@link java.io.InputStream} it can be
|
||||
* used as an input stream. This interface redefines {@link java.io.InputStream}
|
||||
* 's read methods with the extra condition that they may only be used if the
|
||||
* current position of the source is at a byte boundary. The
|
||||
* {@link #readBytes(byte[], int, int)} method does not have that limitation.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
* @see java.io.InputStream
|
||||
* @see BitOutput
|
||||
*/
|
||||
public interface BitInput extends Closeable
|
||||
{
|
||||
/**
|
||||
* Has the input come to its end? If so, nothing more can be read from it.
|
||||
* @return {@code true} if no more can be read from this input.
|
||||
*/
|
||||
boolean isAtEof();
|
||||
|
||||
/**
|
||||
* Move the position to the next byte boundary. If the current position is
|
||||
* already at a byte boundary, this method does nothing.
|
||||
* @throws IOException On I/O errors or if this input is already at the end
|
||||
* of the available data.
|
||||
*/
|
||||
void skipToByteBoundary() throws IOException;
|
||||
|
||||
/**
|
||||
* Read the value of the next bit in the stream.
|
||||
* @return {@code true} if the value is 1, {@code false} if it is 0.
|
||||
* @throws IOException On I/O errors or if this input is already at the end
|
||||
* of the available data.
|
||||
*/
|
||||
boolean readBit() throws IOException;
|
||||
|
||||
/**
|
||||
* Read up to eight bits from the input.
|
||||
* @param no The number of bits to read.
|
||||
* @return The bits as the least significant bits of the returned integer.
|
||||
* For instance, if {@code 1011} is read, the returned integer will have the
|
||||
* value {@code 1 * 8 + 0 * 4 + 1 * 2 + 1 * 1 == 11}.
|
||||
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or greater
|
||||
* than 8.
|
||||
* @throws IOException On I/O errors or if this input is already at the end
|
||||
* of the available data.
|
||||
* @see #readBitsLittleEndian(int)
|
||||
*/
|
||||
int readBits(int no) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* Read up to 32 bits from the input. The first eight bits that is read will
|
||||
* be the most significant byte of the returned integer.
|
||||
* @param no The number of bits to read.
|
||||
* @return The bits read as the least significant bits of the returned
|
||||
* integer. (Just like for {@link #readBits(int)}.
|
||||
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or greater
|
||||
* than 32.
|
||||
* @throws IOException On I/O errors or if this input is already at the end
|
||||
* of the available data.
|
||||
* @see #readBits(int)
|
||||
*/
|
||||
int readBitsLittleEndian(int no) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* Read bytes from the input. Unlike {@link #read(byte[], int, int)}, this
|
||||
* method does not require that the current position is at a byte boundary.
|
||||
* <p>
|
||||
* Another difference to {@link #read(byte[], int, int)} is that this method
|
||||
* throws an {@link IOException} if it cannot read all requested bytes.
|
||||
* @param barr The byte array to read bytes into.
|
||||
* @param off The offset in the array to start writing read bytes at.
|
||||
* @param len The number of bytes to read.
|
||||
* @return {@code barr}.
|
||||
* @throws IndexOutOfBoundsException If the length or the offset is negative
|
||||
* or if the sum of the length and the offset is greater than the length of
|
||||
* the supplied byte array.
|
||||
* @throws IOException On I/O errors or if there was not enough bytes to
|
||||
* read from the input.
|
||||
* @see #read(byte[], int, int)
|
||||
*/
|
||||
public byte[] readBytes(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* Read a single byte from the input. See {@link java.io.InputStream#read()}
|
||||
* .
|
||||
* <p>
|
||||
* This method requires that the current position in the input is at a byte
|
||||
* boundary.
|
||||
* @return The read byte or {@code -1} if the current position is at the end
|
||||
* of the input.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
* @see java.io.InputStream#read()
|
||||
*/
|
||||
int read() throws IOException;
|
||||
|
||||
/**
|
||||
* Read bytes into the supplied array. See
|
||||
* {@link java.io.InputStream#read(byte[])}.
|
||||
* <p>
|
||||
* This method requires that the current position in the input is at a byte
|
||||
* boundary.
|
||||
* @param barr The byte array to read bytes into.
|
||||
* @return The number of bytes read.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
* @see java.io.InputStream#read(byte[])
|
||||
*/
|
||||
int read(byte[] barr) throws IOException;
|
||||
|
||||
/**
|
||||
* Read bytes into the supplied array. See
|
||||
* {@link java.io.InputStream#read(byte[], int, int)}.
|
||||
* <p>
|
||||
* This method requires that the current position in the input is at a byte
|
||||
* boundary.
|
||||
* @param barr The byte array to read bytes into.
|
||||
* @param offset The offset position in the array to start write read bytes
|
||||
* to.
|
||||
* @param len The number of bytes to read.
|
||||
* @return The number of bytes actually read.
|
||||
* @throws IndexOutOfBoundsException If the offset or the length is negative
|
||||
* or if the sum of the offset and the length is greater than the length of
|
||||
* the supplied byte array.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
*/
|
||||
int read(byte[] barr, int offset, int len) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* Skip bytes in the input. See {@link java.io.InputStream#skip(long)}.
|
||||
* <p>
|
||||
* This method requires that the current position in the input is at a byte
|
||||
* boundary.
|
||||
* @param n The number of bytes to skip.
|
||||
* @return The number of bytes skipped.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
*/
|
||||
long skip(long n) throws IOException;
|
||||
|
||||
/**
|
||||
* Get the number of bytes available in the input. See
|
||||
* {@link java.io.InputStream#available()}.
|
||||
* <p>
|
||||
* This method requires that the current position in the input is at a byte
|
||||
* boundary.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
*/
|
||||
int available() throws IOException;
|
||||
}
|
162
src/main/java/org/at4j/support/io/BitOutput.java
Normal file
162
src/main/java/org/at4j/support/io/BitOutput.java
Normal file
@ -0,0 +1,162 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.io;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This interface identifies a sink for bits.
|
||||
* <p>
|
||||
* The sink is assumed to have a position which may or may not be at a byte
|
||||
* boundary (every eight bits).
|
||||
* <p>
|
||||
* If an implementing class also extends {@link java.io.OutputStream} it can be
|
||||
* used as an output stream. This interface redefines
|
||||
* {@link java.io.OutputStream}'s write methods with the extra condition that
|
||||
* they may only be used if the current position of the sink is at a byte
|
||||
* boundary. The {@link #writeBytes(byte[], int, int)} method does not have that
|
||||
* limitation.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
* @see java.io.OutputStream
|
||||
* @see BitInput
|
||||
*/
|
||||
public interface BitOutput extends Closeable
|
||||
{
|
||||
/**
|
||||
* Pad the output with zeroes to the next byte boundary. If the current
|
||||
* position is already at a byte boundary, this method does nothing.
|
||||
* @throws IOException On I/O errors.
|
||||
*/
|
||||
void padToByteBoundary() throws IOException;
|
||||
|
||||
/**
|
||||
* Get the value of the unfinished byte. The value is shifted so that the
|
||||
* least significant bit positions are used.
|
||||
* {@link #getNumberOfBitsInUnfinishedByte()} returns how many bit positions
|
||||
* that are used.
|
||||
* <p>
|
||||
* If the current position is at a byte boundary, 0 is returned.
|
||||
* @return The value of the unfinished byte.
|
||||
*/
|
||||
int getUnfinishedByte();
|
||||
|
||||
/**
|
||||
* Get the number of bits that have been written to the last byte.
|
||||
* <p>
|
||||
* If the current position is at a byte boundary, 0 is returned.
|
||||
* @return The number of bits that have been written to the last byte. This
|
||||
* is a number between 0 and 7 (inclusive).
|
||||
*/
|
||||
int getNumberOfBitsInUnfinishedByte();
|
||||
|
||||
/**
|
||||
* Write a single bit.
|
||||
* @param val The bit ({@code true == 1}, {@code false == 0}).
|
||||
* @throws IOException On I/O errors.
|
||||
*/
|
||||
void writeBit(boolean val) throws IOException;
|
||||
|
||||
/**
|
||||
* Write up to eight bits.
|
||||
* @param val The value to write. The bits written are the {@code no}
|
||||
* rightmost bits of {@code val}. It is not verified that {@code val} fits
|
||||
* within its {@code no} rightmost bits. If it does not, the written value
|
||||
* is simply truncated.
|
||||
* @param no The number of bits to write. This must be between 0 and 8
|
||||
* (inclusive).
|
||||
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or greater
|
||||
* than 8.
|
||||
* @throws IOException On I/O errors
|
||||
* @see #writeBitsLittleEndian(int, int)
|
||||
*/
|
||||
void writeBits(int val, int no) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* Write up to 32 bits. The bits are written little endian with the most
|
||||
* significant bit first.
|
||||
* @param val The value to write. The bits written are the {@code no}
|
||||
* rightmost bits of {@code val}. It is not verified that {@code val} fits
|
||||
* within its {@code no} rightmost bits. If it does not, the written value
|
||||
* is simply truncated.
|
||||
* @param no The number of bits to write. This must be between 0 and 32
|
||||
* (inclusive)
|
||||
* @throws IndexOutOfBoundsException If {@code no} is less than 0 or more
|
||||
* than 32.
|
||||
* @throws IOException On I/O errors.
|
||||
* @see #writeBits(int, int)
|
||||
*/
|
||||
void writeBitsLittleEndian(int val, int no) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* Write an array of bytes to the output. Unlike
|
||||
* {@link #write(byte[], int, int)}, this method does not require that the
|
||||
* current position is at a byte boundary.
|
||||
* @param barr The bytes to write.
|
||||
* @param off The offset in the byte array.
|
||||
* @param len The number of bytes to write.
|
||||
* @throws IndexOutOfBoundsException If the offset or the length is negative
|
||||
* or if the offset + length is larger than the byte array.
|
||||
* @throws IOException On I/O errors
|
||||
* @see #write(byte[], int, int)
|
||||
*/
|
||||
void writeBytes(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException;
|
||||
|
||||
/**
|
||||
* See {@link java.io.OutputStream#write(int)}.
|
||||
* <p>
|
||||
* This method requires that the current position of the output is at a byte
|
||||
* boundary.
|
||||
* @param b The byte to write (0 - 255).
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
* @see java.io.OutputStream#write(int)
|
||||
*/
|
||||
void write(int b) throws IOException;
|
||||
|
||||
/**
|
||||
* See {@link java.io.OutputStream#write(byte[])}.
|
||||
* <p>
|
||||
* This method requires that the current position of the output is at a byte
|
||||
* boundary.
|
||||
* @param barr The bytes to write.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
* @see java.io.OutputStream#write(byte[])
|
||||
*/
|
||||
void write(byte[] barr) throws IOException;
|
||||
|
||||
/**
|
||||
* See {@link java.io.OutputStream#write(byte[], int, int)}.
|
||||
* <p>
|
||||
* This method requires that the current position of the output is at a byte
|
||||
* boundary.
|
||||
* @param barr The bytes to write.
|
||||
* @param off The offset in the byte array.
|
||||
* @param len The number of bytes to write.
|
||||
* @throws IndexOutOfBoundsException If the offset or the length is negative
|
||||
* or if the offset + length is larger than the byte array.
|
||||
* @throws IOException On I/O errors or if the current position is not at a
|
||||
* byte boundary.
|
||||
* @see java.io.OutputStream#write(byte[], int, int)
|
||||
* @see #writeBytes(byte[], int, int)
|
||||
*/
|
||||
void write(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException;
|
||||
}
|
@ -0,0 +1,380 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* This is an input stream that a client can use to read single or several bits
|
||||
* from an underlying {@link InputStream}. The bits are read in little-endian
|
||||
* bit order.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
public class LittleEndianBitInputStream extends InputStream implements BitInput
|
||||
{
|
||||
// 2^0
|
||||
private static final int POINTER_START_OF_BYTE = 0;
|
||||
// 2^7
|
||||
private static final int POINTER_END_OF_BYTE = 7;
|
||||
|
||||
private final InputStream m_in;
|
||||
|
||||
// The current byte
|
||||
private int m_curByte;
|
||||
// The pointer to the current bit location in the current byte.
|
||||
private int m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
|
||||
private long m_numberOfBytesRead = 0;
|
||||
|
||||
public LittleEndianBitInputStream(InputStream in) throws IOException
|
||||
{
|
||||
// Null check
|
||||
in.getClass();
|
||||
|
||||
m_in = in;
|
||||
m_curByte = in.read();
|
||||
// Don't increment the number of read bytes counter. It is always one
|
||||
// byte behind.
|
||||
}
|
||||
|
||||
private int readByte() throws IOException
|
||||
{
|
||||
int res = m_in.read();
|
||||
m_numberOfBytesRead += res != -1 ? 1 : 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
private void incrementPointerPosition() throws IOException
|
||||
{
|
||||
if (m_pointerInByte == POINTER_END_OF_BYTE)
|
||||
{
|
||||
// Read a new byte
|
||||
m_curByte = readByte();
|
||||
m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Increment the pointer only if we're not at EOF
|
||||
if (!isAtEof())
|
||||
{
|
||||
m_pointerInByte++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isAtEof()
|
||||
{
|
||||
return m_curByte == -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of whole bytes read this far.
|
||||
* @return The number of bytes read this far.
|
||||
*/
|
||||
public long getNumberOfBytesRead()
|
||||
{
|
||||
return m_numberOfBytesRead;
|
||||
}
|
||||
|
||||
private void assertNotAtEOF() throws IOException
|
||||
{
|
||||
if (isAtEof())
|
||||
{
|
||||
throwIOException("At EOF");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isAtByteBoundary()
|
||||
{
|
||||
return m_pointerInByte == POINTER_START_OF_BYTE;
|
||||
}
|
||||
|
||||
private void assertAtByteBoundary() throws IOException
|
||||
{
|
||||
if (!isAtByteBoundary())
|
||||
{
|
||||
throwIOException("Not at byte boundary. Position: pos=" + m_pointerInByte);
|
||||
}
|
||||
}
|
||||
|
||||
private void throwIOException(String msg, long pos) throws IOException
|
||||
{
|
||||
throw new IOException(msg + ". Position in stream: " + pos);
|
||||
}
|
||||
|
||||
private void throwIOException(String msg) throws IOException
|
||||
{
|
||||
throw new IOException(msg + ". Position in stream: " + m_numberOfBytesRead);
|
||||
}
|
||||
|
||||
public void skipToByteBoundary() throws IOException
|
||||
{
|
||||
assertNotAtEOF();
|
||||
if (m_pointerInByte != POINTER_START_OF_BYTE)
|
||||
{
|
||||
m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
m_curByte = readByte();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean readBit() throws IOException
|
||||
{
|
||||
assertNotAtEOF();
|
||||
boolean res = (m_curByte & (1 << (7 - m_pointerInByte))) > 0;
|
||||
incrementPointerPosition();
|
||||
return res;
|
||||
}
|
||||
|
||||
public int readBits(int no) throws IOException, IndexOutOfBoundsException
|
||||
{
|
||||
if (no < 0 || no > 8)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid number of bits: " + no + ". Must be between 0 and 8 (inclusive)");
|
||||
}
|
||||
assertNotAtEOF();
|
||||
|
||||
if (no == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Bytes are stored little bit endian
|
||||
if (no + m_pointerInByte <= 8)
|
||||
{
|
||||
// All bits to read fit in the current byte
|
||||
int res = (m_curByte >> (8 - no - m_pointerInByte)) & ((1 << no) - 1);
|
||||
m_pointerInByte += no;
|
||||
if (m_pointerInByte > POINTER_END_OF_BYTE)
|
||||
{
|
||||
m_curByte = readByte();
|
||||
m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Read remaining bits + first bits of next byte
|
||||
int noToReadInByte2 = no - (8 - m_pointerInByte);
|
||||
int res = (m_curByte & ((1 << (8 - m_pointerInByte)) - 1)) << noToReadInByte2;
|
||||
m_curByte = readByte();
|
||||
assertNotAtEOF();
|
||||
m_pointerInByte = noToReadInByte2;
|
||||
res += m_curByte >> (8 - noToReadInByte2);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
public int readBitsLittleEndian(int no) throws IOException, IndexOutOfBoundsException
|
||||
{
|
||||
if (no < 0 || no > 32)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid number of bits: " + no + ". Must be between 0 and 32 (inclusive)");
|
||||
}
|
||||
|
||||
if (no == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int noReads = no / 8;
|
||||
int mod = no % 8;
|
||||
int res = 0;
|
||||
if (mod != 0)
|
||||
{
|
||||
res = readBits(mod) << (noReads * 8);
|
||||
}
|
||||
for (int i = 0; i < noReads; i++)
|
||||
{
|
||||
res += readBits(8) << ((noReads - i - 1) * 8);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public byte[] readBytes(byte[] barr, int off, int len) throws IOException, IndexOutOfBoundsException
|
||||
{
|
||||
if (off < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid offset " + off + ". It must be >= 0");
|
||||
}
|
||||
if (len < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid length " + len + ". It must be >= 0");
|
||||
}
|
||||
if (off + len > barr.length)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid offset + length (" + off + " + " + len + "). It must be <= the length of the supplied array (" + barr.length + ")");
|
||||
}
|
||||
|
||||
assertNotAtEOF();
|
||||
|
||||
if (len == 0)
|
||||
{
|
||||
return barr;
|
||||
}
|
||||
|
||||
if (isAtByteBoundary())
|
||||
{
|
||||
// Special case: we are at the byte boundary. We just have to read
|
||||
// the len next bytes and return them.
|
||||
// The read method takes care of updating all internal state.
|
||||
int noRead = read(barr, off, len);
|
||||
if (noRead != len)
|
||||
{
|
||||
throwIOException("Unexpected EOF. Wanted to read " + len + " bytes. Got " + noRead, m_numberOfBytesRead - noRead);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int noRead = m_in.read(barr, off, len);
|
||||
m_numberOfBytesRead += noRead;
|
||||
if (noRead != len)
|
||||
{
|
||||
m_curByte = -1;
|
||||
m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
throwIOException("Unexpected EOF. Wanted to read " + len + " bytes. Got " + noRead, m_numberOfBytesRead - noRead);
|
||||
}
|
||||
|
||||
// Shift bytes in the result array. Bytes are stored little (bit-)
|
||||
// endian.
|
||||
int lastByte = m_curByte;
|
||||
m_curByte = barr[off + len - 1] & 0xFF;
|
||||
// The distance to shift the second byte to the right.
|
||||
int rightShiftDistance = 8 - m_pointerInByte;
|
||||
for (int i = off; i < off + len; i++)
|
||||
{
|
||||
int newLastByte = barr[i];
|
||||
barr[i] = (byte) (((lastByte << m_pointerInByte) | ((barr[i] & 0xFF) >>> rightShiftDistance)) & 0xFF);
|
||||
lastByte = newLastByte;
|
||||
}
|
||||
}
|
||||
return barr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException
|
||||
{
|
||||
assertAtByteBoundary();
|
||||
int res = m_curByte;
|
||||
if (m_curByte != -1)
|
||||
{
|
||||
m_curByte = readByte();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] barr) throws IOException
|
||||
{
|
||||
return read(barr, 0, barr.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] barr, int offset, int len) throws IndexOutOfBoundsException, IOException
|
||||
{
|
||||
if (offset < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Illegal offset: " + offset);
|
||||
}
|
||||
else if (len < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Illegal length: " + len);
|
||||
}
|
||||
else if ((offset + len) > barr.length)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Illegal offset + length: " + offset + " + " + len + ". Longer than the byte array: " + barr.length);
|
||||
}
|
||||
|
||||
assertAtByteBoundary();
|
||||
if (isAtEof())
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
barr[offset] = (byte) m_curByte;
|
||||
int res = 1;
|
||||
if (len > 1)
|
||||
{
|
||||
int noRead = m_in.read(barr, offset + 1, len - 1);
|
||||
if (noRead > 0)
|
||||
{
|
||||
res += noRead;
|
||||
m_numberOfBytesRead += noRead;
|
||||
}
|
||||
}
|
||||
m_curByte = readByte();
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long n) throws IOException
|
||||
{
|
||||
assertAtByteBoundary();
|
||||
if (n <= 0L)
|
||||
{
|
||||
return 0L;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isAtEof())
|
||||
{
|
||||
return 0L;
|
||||
}
|
||||
|
||||
if (n > 1L)
|
||||
{
|
||||
long noToSkip = n - 1L;
|
||||
long noSkipped = m_in.skip(noToSkip);
|
||||
m_numberOfBytesRead += noSkipped;
|
||||
if (noSkipped < noToSkip)
|
||||
{
|
||||
// At EOF
|
||||
m_curByte = -1;
|
||||
return noSkipped + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_curByte = readByte();
|
||||
return noSkipped + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_curByte = readByte();
|
||||
return 1L;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int available() throws IOException
|
||||
{
|
||||
assertAtByteBoundary();
|
||||
return m_in.available() + m_curByte != -1 ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
{
|
||||
m_in.close();
|
||||
}
|
||||
}
|
@ -0,0 +1,265 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* This is an {@link OutputStream} that implements {@link BitOutput} and hence
|
||||
* can be used to write individual bits to the output. The bits are stored in
|
||||
* little-endian order.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1
|
||||
*/
|
||||
public class LittleEndianBitOutputStream extends OutputStream implements BitOutput
|
||||
{
|
||||
// 2^0
|
||||
private static final int POINTER_START_OF_BYTE = 0;
|
||||
// 2^7
|
||||
private static final int POINTER_END_OF_BYTE = 7;
|
||||
|
||||
private final OutputStream m_out;
|
||||
|
||||
// The current byte
|
||||
private int m_curByte = 0;
|
||||
// The pointer to the current bit location in the current byte.
|
||||
private int m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
|
||||
private long m_numberOfBytesWritten = 0;
|
||||
|
||||
public LittleEndianBitOutputStream(OutputStream wrapped)
|
||||
{
|
||||
// Null check
|
||||
wrapped.getClass();
|
||||
|
||||
m_out = wrapped;
|
||||
}
|
||||
|
||||
private boolean isAtByteBoundary()
|
||||
{
|
||||
return m_pointerInByte == POINTER_START_OF_BYTE;
|
||||
}
|
||||
|
||||
private void assertAtByteBoundary() throws IOException
|
||||
{
|
||||
if (!isAtByteBoundary())
|
||||
{
|
||||
throwIOException("Not at byte boundary. Position: pos=" + m_pointerInByte);
|
||||
}
|
||||
}
|
||||
|
||||
private void throwIOException(String msg) throws IOException
|
||||
{
|
||||
throw new IOException(msg + ". Position in stream: " + m_numberOfBytesWritten);
|
||||
}
|
||||
|
||||
private void writeCurByte() throws IOException
|
||||
{
|
||||
m_out.write(m_curByte);
|
||||
m_numberOfBytesWritten++;
|
||||
m_pointerInByte = POINTER_START_OF_BYTE;
|
||||
m_curByte = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total number of whole <i>bytes</i> written by this stream so far.
|
||||
* @return The number of whole bytes written.
|
||||
*/
|
||||
public long getNumberOfBytesWritten()
|
||||
{
|
||||
return m_numberOfBytesWritten;
|
||||
}
|
||||
|
||||
public int getUnfinishedByte()
|
||||
{
|
||||
return m_pointerInByte > 0 ? m_curByte >>> (7 - (m_pointerInByte - 1)) : 0;
|
||||
}
|
||||
|
||||
public int getNumberOfBitsInUnfinishedByte()
|
||||
{
|
||||
return m_pointerInByte;
|
||||
}
|
||||
|
||||
public void padToByteBoundary() throws IOException
|
||||
{
|
||||
if (m_pointerInByte > POINTER_START_OF_BYTE)
|
||||
{
|
||||
writeCurByte();
|
||||
}
|
||||
}
|
||||
|
||||
public void writeBit(boolean val) throws IOException
|
||||
{
|
||||
if (val)
|
||||
{
|
||||
m_curByte = m_curByte | 1 << (7 - m_pointerInByte);
|
||||
}
|
||||
m_pointerInByte++;
|
||||
|
||||
if (m_pointerInByte > POINTER_END_OF_BYTE)
|
||||
{
|
||||
// Write the current byte and start a new one
|
||||
writeCurByte();
|
||||
}
|
||||
}
|
||||
|
||||
public void writeBits(int val, int no) throws IOException, IndexOutOfBoundsException
|
||||
{
|
||||
if (no < 0 || no > 8)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid number of bits " + no + ". Must be between 0 and 8 (inclusive)");
|
||||
}
|
||||
|
||||
if (no == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_pointerInByte + no <= 8)
|
||||
{
|
||||
// All bits to write fit in the current byte
|
||||
m_curByte = m_curByte | ((val & ((1 << no) - 1)) << (8 - m_pointerInByte - no));
|
||||
m_pointerInByte += no;
|
||||
if (m_pointerInByte > POINTER_END_OF_BYTE)
|
||||
{
|
||||
writeCurByte();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Bits will have to be written in the next byte too
|
||||
int bitsToWriteInCurByte = 8 - m_pointerInByte;
|
||||
int bitsToWriteInNextByte = no - bitsToWriteInCurByte;
|
||||
m_curByte = m_curByte | (val >>> (no - bitsToWriteInCurByte));
|
||||
writeCurByte();
|
||||
m_curByte = (val & ((1 << bitsToWriteInNextByte) - 1)) << (8 - bitsToWriteInNextByte);
|
||||
m_pointerInByte = bitsToWriteInNextByte;
|
||||
}
|
||||
}
|
||||
|
||||
public void writeBitsLittleEndian(int val, int no) throws IndexOutOfBoundsException, IOException
|
||||
{
|
||||
if (no < 0 || no > 32)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid number of bits to write " + no + ". It must be between 0 and 32 (inclusive)");
|
||||
}
|
||||
|
||||
if (no == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int noWrites = no / 8;
|
||||
int mod = no % 8;
|
||||
if (mod != 0)
|
||||
{
|
||||
writeBits(val >>> (noWrites * 8), mod);
|
||||
}
|
||||
for (int i = 0; i < noWrites; i++)
|
||||
{
|
||||
writeBits(val >>> ((noWrites - i - 1) * 8), 8);
|
||||
}
|
||||
}
|
||||
|
||||
public void writeBytes(byte[] barr, int off, int len) throws IndexOutOfBoundsException, IOException
|
||||
{
|
||||
if (off < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid offset " + off + ". It must be >= 0");
|
||||
}
|
||||
if (len < 0)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid length " + len + ". It must be >= 0");
|
||||
}
|
||||
if (off + len > barr.length)
|
||||
{
|
||||
throw new IndexOutOfBoundsException("Invalid offset + length (" + off + " + " + len + "). It must be <= the length of the supplied array (" + barr.length + ")");
|
||||
}
|
||||
|
||||
if (len == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (isAtByteBoundary())
|
||||
{
|
||||
// Special case
|
||||
m_out.write(barr, off, len);
|
||||
m_numberOfBytesWritten += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Copy the bytes to write to a new array. We cannot modify barr,
|
||||
// even if it is tempting.
|
||||
byte[] toWrite = new byte[len];
|
||||
System.arraycopy(barr, off, toWrite, 0, len);
|
||||
|
||||
int prevByte = m_curByte;
|
||||
int leftShiftDistance = 8 - m_pointerInByte;
|
||||
for (int i = 0; i < len; i++)
|
||||
{
|
||||
// Shift in bits from the previous byte and shift out bytes
|
||||
// from this byte
|
||||
int nextPrevByte = (toWrite[i] & 0xFF) << leftShiftDistance;
|
||||
toWrite[i] = (byte) ((prevByte | ((toWrite[i] & 0xFF) >>> m_pointerInByte)) & 0xFF);
|
||||
prevByte = nextPrevByte;
|
||||
}
|
||||
m_curByte = prevByte & 0xFF;
|
||||
m_out.write(toWrite);
|
||||
m_numberOfBytesWritten += len;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int b) throws IOException
|
||||
{
|
||||
assertAtByteBoundary();
|
||||
m_out.write(b);
|
||||
m_numberOfBytesWritten++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] barr) throws IOException
|
||||
{
|
||||
write(barr, 0, barr.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] barr, int off, int len) throws IOException
|
||||
{
|
||||
assertAtByteBoundary();
|
||||
m_out.write(barr, off, len);
|
||||
m_numberOfBytesWritten += len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the output stream.
|
||||
* <p>
|
||||
* This method does not automatically pad the last written bits to a full
|
||||
* byte. If there are bits written to it the stream must be padded before
|
||||
* closing it. See {@link #padToByteBoundary()}.
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException
|
||||
{
|
||||
m_out.close();
|
||||
super.close();
|
||||
}
|
||||
}
|
25
src/main/java/org/at4j/support/io/package-info.java
Normal file
25
src/main/java/org/at4j/support/io/package-info.java
Normal file
@ -0,0 +1,25 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
* Support classes that probably would have been in {@code java.io} if they
|
||||
* had been a part of Java.
|
||||
* @since 1.0
|
||||
* @author Karl Gustafsson
|
||||
*/
|
||||
package org.at4j.support.io;
|
58
src/main/java/org/at4j/support/lang/At4JException.java
Normal file
58
src/main/java/org/at4j/support/lang/At4JException.java
Normal file
@ -0,0 +1,58 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
/**
|
||||
* This is a base class for exceptions in this project. It inherits
|
||||
* {@link RuntimeException}, so it is unchecked.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0
|
||||
*/
|
||||
public class At4JException extends RuntimeException
|
||||
{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Create an exception with a message.
|
||||
* @param msg The message.
|
||||
*/
|
||||
public At4JException(String msg)
|
||||
{
|
||||
super(msg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an exception that wraps another exception.
|
||||
* @param t The other exception.
|
||||
*/
|
||||
public At4JException(Throwable t)
|
||||
{
|
||||
super(t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an exception that wraps another exception and has a message.
|
||||
* @param msg The message.
|
||||
* @param t The other exception.
|
||||
*/
|
||||
public At4JException(String msg, Throwable t)
|
||||
{
|
||||
super(msg, t);
|
||||
}
|
||||
}
|
197
src/main/java/org/at4j/support/lang/SignedInteger.java
Normal file
197
src/main/java/org/at4j/support/lang/SignedInteger.java
Normal file
@ -0,0 +1,197 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
/**
|
||||
* This class represents a signed integer value (i.e. a plain {@code int}
|
||||
* value). If the {@link java.lang.Integer} class had not been declared {@code
|
||||
* final}, this class would probably have extended it.
|
||||
* <p>
|
||||
* Signed integer objects are created by calling any of the static creation
|
||||
* methods on this class.
|
||||
* <p>
|
||||
* Instances of this class are immutable.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.1.1
|
||||
* @see UnsignedInteger
|
||||
* @see SignedLong
|
||||
*/
|
||||
public class SignedInteger implements Comparable<SignedInteger>
|
||||
{
|
||||
/**
|
||||
* This constant represents the value {@code 0}.
|
||||
*/
|
||||
public static final SignedInteger ZERO = new SignedInteger(0);
|
||||
|
||||
/**
|
||||
* This constant represents the value {@code 1}.
|
||||
*/
|
||||
public static final SignedInteger ONE = new SignedInteger(1);
|
||||
|
||||
private final int m_value;
|
||||
|
||||
/**
|
||||
* Create a new signed integer value.
|
||||
* @param value The value.
|
||||
*/
|
||||
private SignedInteger(int value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new signed integer value.
|
||||
* @param value The integer value.
|
||||
* @return The signed integer value.
|
||||
*/
|
||||
public static SignedInteger valueOf(int value)
|
||||
{
|
||||
if (value == 0)
|
||||
{
|
||||
return ZERO;
|
||||
}
|
||||
else if (value == 1)
|
||||
{
|
||||
return ONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return new SignedInteger(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the signed integer value.
|
||||
* @return The signed integer value.
|
||||
*/
|
||||
public long intValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the signed integer value represented as a big-endian byte array (four
|
||||
* bytes long).
|
||||
* @return The integer value represented as a big-endian byte array.
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
* @see #getLittleEndianByteArray()
|
||||
*/
|
||||
public byte[] getBigEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[4];
|
||||
res[0] = (byte) m_value;
|
||||
res[1] = (byte) (m_value >> 8);
|
||||
res[2] = (byte) (m_value >> 16);
|
||||
res[3] = (byte) (m_value >> 24);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a signed integer value from an four bytes long big-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be four bytes long.
|
||||
* @return The signed four value.
|
||||
* @throws IllegalArgumentException If the byte array is not four bytes
|
||||
* long.
|
||||
* @see #getBigEndianByteArray()
|
||||
* @see #fromLittleEndianByteArray(byte[])
|
||||
*/
|
||||
public static SignedInteger fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 4)
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be four bytes long");
|
||||
}
|
||||
int value = barr[0] & 0xFF;
|
||||
value += ((barr[1] & 0xFFL) << 8);
|
||||
value += ((barr[2] & 0xFFL) << 16);
|
||||
value += ((barr[3] & 0xFFL) << 24);
|
||||
return valueOf(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the signed integer value represented as a little-endian byte array
|
||||
* (four bytes long).
|
||||
* @return The integer value represented as a little-endian byte array.
|
||||
* @see #getBigEndianByteArray()
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
*/
|
||||
public byte[] getLittleEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[4];
|
||||
res[0] = (byte) (m_value >> 24);
|
||||
res[1] = (byte) (m_value >> 16);
|
||||
res[2] = (byte) (m_value >> 8);
|
||||
res[3] = (byte) m_value;
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a signed integer value from an four bytes long little-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be four bytes long.
|
||||
* @return The signed integer value.
|
||||
* @throws IllegalArgumentException If the byte array is not four bytes
|
||||
* long.
|
||||
* @see #getLittleEndianByteArray()
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
*/
|
||||
public static SignedInteger fromLittleEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 4)
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be four bytes long");
|
||||
}
|
||||
int value = barr[3] & 0xFF;
|
||||
value += ((barr[2] & 0xFFL) << 8);
|
||||
value += ((barr[1] & 0xFFL) << 16);
|
||||
value += ((barr[0] & 0xFFL) << 24);
|
||||
return valueOf(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (o != null && o instanceof SignedInteger)
|
||||
{
|
||||
return m_value == ((SignedInteger) o).m_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
public int compareTo(SignedInteger l2)
|
||||
{
|
||||
return Integer.valueOf(m_value).compareTo(Integer.valueOf(l2.m_value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "" + m_value;
|
||||
}
|
||||
}
|
213
src/main/java/org/at4j/support/lang/SignedLong.java
Normal file
213
src/main/java/org/at4j/support/lang/SignedLong.java
Normal file
@ -0,0 +1,213 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
/**
|
||||
* This class represents a signed long value (i.e. a plain {@code long} value).
|
||||
* If the {@link java.lang.Long} class had not been declared {@code final}, this
|
||||
* class would probably have extended it.
|
||||
* <p>
|
||||
* Signed long objects are created by calling any of the static creation methods
|
||||
* on this class.
|
||||
* <p>
|
||||
* Instances of this class are immutable.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0
|
||||
* @see UnsignedLong
|
||||
* @see SignedInteger
|
||||
*/
|
||||
public class SignedLong implements Comparable<SignedLong>
|
||||
{
|
||||
/**
|
||||
* This constant represents the value {@code 0}.
|
||||
*/
|
||||
public static final SignedLong ZERO = new SignedLong(0);
|
||||
|
||||
/**
|
||||
* This constant represents the value {@code 1}.
|
||||
*/
|
||||
public static final SignedLong ONE = new SignedLong(1);
|
||||
|
||||
private final long m_value;
|
||||
|
||||
/**
|
||||
* Create a new signed long value.
|
||||
* @param value The value.
|
||||
*/
|
||||
private SignedLong(long value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new signed long value.
|
||||
* @param value The long value.
|
||||
* @return The signed long value.
|
||||
*/
|
||||
public static SignedLong valueOf(long value)
|
||||
{
|
||||
if (value == 0)
|
||||
{
|
||||
return ZERO;
|
||||
}
|
||||
else if (value == 1)
|
||||
{
|
||||
return ONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return new SignedLong(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the signed long value.
|
||||
* @return The signed long value.
|
||||
*/
|
||||
public long longValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the signed long value represented as a big-endian byte array (eight
|
||||
* bytes long).
|
||||
* @return The long value represented as a big-endian byte array.
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
* @see #getLittleEndianByteArray()
|
||||
*/
|
||||
public byte[] getBigEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[8];
|
||||
res[0] = (byte) m_value;
|
||||
res[1] = (byte) (m_value >> 8);
|
||||
res[2] = (byte) (m_value >> 16);
|
||||
res[3] = (byte) (m_value >> 24);
|
||||
res[4] = (byte) (m_value >> 32);
|
||||
res[5] = (byte) (m_value >> 40);
|
||||
res[6] = (byte) (m_value >> 48);
|
||||
res[7] = (byte) (m_value >> 56);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a signed long value from an eight bytes long big-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be eight bytes long.
|
||||
* @return The signed long value.
|
||||
* @throws IllegalArgumentException If the byte array is not eight bytes
|
||||
* long.
|
||||
* @see #getBigEndianByteArray()
|
||||
* @see #fromLittleEndianByteArray(byte[])
|
||||
*/
|
||||
public static SignedLong fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 8)
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be eight bytes long");
|
||||
}
|
||||
long value = barr[0] & 0xFF;
|
||||
value += ((barr[1] & 0xFFL) << 8);
|
||||
value += ((barr[2] & 0xFFL) << 16);
|
||||
value += ((barr[3] & 0xFFL) << 24);
|
||||
value += ((barr[4] & 0xFFL) << 32);
|
||||
value += ((barr[5] & 0xFFL) << 40);
|
||||
value += ((barr[6] & 0xFFL) << 48);
|
||||
value += ((barr[7] & 0xFFL) << 56);
|
||||
return valueOf(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the signed long value represented as a little-endian byte array
|
||||
* (eight bytes long).
|
||||
* @return The long value represented as a little-endian byte array.
|
||||
* @see #getBigEndianByteArray()
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
*/
|
||||
public byte[] getLittleEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[8];
|
||||
res[0] = (byte) (m_value >> 56);
|
||||
res[1] = (byte) (m_value >> 48);
|
||||
res[2] = (byte) (m_value >> 40);
|
||||
res[3] = (byte) (m_value >> 32);
|
||||
res[4] = (byte) (m_value >> 24);
|
||||
res[5] = (byte) (m_value >> 16);
|
||||
res[6] = (byte) (m_value >> 8);
|
||||
res[7] = (byte) m_value;
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a signed long value from an eight bytes long little-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be eight bytes long.
|
||||
* @return The signed long value.
|
||||
* @throws IllegalArgumentException If the byte array is not eight bytes
|
||||
* long.
|
||||
* @see #getLittleEndianByteArray()
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
*/
|
||||
public static SignedLong fromLittleEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 8)
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal size of supplied byte array: " + barr.length + ". It must be eight bytes long");
|
||||
}
|
||||
long value = barr[7] & 0xFF;
|
||||
value += ((barr[6] & 0xFFL) << 8);
|
||||
value += ((barr[5] & 0xFFL) << 16);
|
||||
value += ((barr[4] & 0xFFL) << 24);
|
||||
value += ((barr[3] & 0xFFL) << 32);
|
||||
value += ((barr[2] & 0xFFL) << 40);
|
||||
value += ((barr[1] & 0xFFL) << 48);
|
||||
value += ((barr[0] & 0xFFL) << 56);
|
||||
return valueOf(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (o != null && o instanceof SignedLong)
|
||||
{
|
||||
return m_value == ((SignedLong) o).m_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return (int) (m_value ^ (m_value >>> 32));
|
||||
}
|
||||
|
||||
public int compareTo(SignedLong l2)
|
||||
{
|
||||
return Long.valueOf(m_value).compareTo(Long.valueOf(l2.m_value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "" + m_value;
|
||||
}
|
||||
}
|
197
src/main/java/org/at4j/support/lang/UnsignedByte.java
Normal file
197
src/main/java/org/at4j/support/lang/UnsignedByte.java
Normal file
@ -0,0 +1,197 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
/**
|
||||
* This object represents an unsigned byte (eight bits) with a value between
|
||||
* {@code 0} and {@code 255} (inclusive). It is immutable.
|
||||
* <p>
|
||||
* Unsigned byte instances are created by calling any of the static {@code
|
||||
* valueOf} methods on this class.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0
|
||||
* @see UnsignedShort
|
||||
* @see UnsignedInteger
|
||||
* @see UnsignedLong
|
||||
*/
|
||||
public final class UnsignedByte implements Serializable, Comparable<UnsignedByte>
|
||||
{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* The maximum value of an unsigned byte (255).
|
||||
*/
|
||||
public static final short MAX_VALUE = (1 << 8) - 1;
|
||||
|
||||
/**
|
||||
* The minimum value of an unsigned byte (0).
|
||||
*/
|
||||
public static final short MIN_VALUE = 0;
|
||||
|
||||
/**
|
||||
* The value 0.
|
||||
*/
|
||||
public static final UnsignedByte ZERO = new UnsignedByte((byte) 0);
|
||||
|
||||
/**
|
||||
* The value 1.
|
||||
*/
|
||||
public static final UnsignedByte ONE = new UnsignedByte((byte) 1);
|
||||
|
||||
private final byte m_value;
|
||||
|
||||
private UnsignedByte(byte value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned byte value from the supplied byte value. The supplied
|
||||
* value is treated as if it was unsigned, which means that negative
|
||||
* argument values will result in unsigned byte values between 128 and 255.
|
||||
* @param value The value.
|
||||
* @return The unsigned byte value.
|
||||
* @see #valueOf(short)
|
||||
* @see #valueOf(int)
|
||||
*/
|
||||
public static UnsignedByte valueOf(byte value)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case 0:
|
||||
return ZERO;
|
||||
case 1:
|
||||
return ONE;
|
||||
default:
|
||||
return new UnsignedByte(value);
|
||||
}
|
||||
}
|
||||
|
||||
private static UnsignedByte valueOfSafe(int value)
|
||||
{
|
||||
return valueOf((byte) (value & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new unsigned byte value from the supplied {@code short} value
|
||||
* which must be in the range {@code 0} to {@code 255} (inclusive).
|
||||
* @param value The value.
|
||||
* @return An unsigned byte value.
|
||||
* @throws IllegalArgumentException If the supplied value is not in the
|
||||
* permitted range.
|
||||
*/
|
||||
public static UnsignedByte valueOf(short value) throws IllegalArgumentException
|
||||
{
|
||||
if ((value < MIN_VALUE) || (value > MAX_VALUE))
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal unsigned byte value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
|
||||
}
|
||||
return valueOf((byte) (value & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new unsigned byte value from the supplied {@code int} value
|
||||
* which must be in the range {@code 0} to {@code 255} (inclusive).
|
||||
* @param value The value.
|
||||
* @return An unsigned byte value.
|
||||
* @throws IllegalArgumentException If the supplied value is not in the
|
||||
* permitted range.
|
||||
*/
|
||||
public static UnsignedByte valueOf(int value) throws IllegalArgumentException
|
||||
{
|
||||
if ((value < MIN_VALUE) || (value > MAX_VALUE))
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal unsigned byte value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
|
||||
}
|
||||
return valueOf((byte) (value & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned byte value as an {@code int}.
|
||||
* @return The value.
|
||||
*/
|
||||
public int intValue()
|
||||
{
|
||||
return m_value & 0xFF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned byte value as a {@code short}.
|
||||
* @return The value.
|
||||
*/
|
||||
public short shortValue()
|
||||
{
|
||||
return (short) (m_value & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned byte value as a signed byte value between {@code -128}
|
||||
* and {@code 127} (inclusive).
|
||||
* @return The value.
|
||||
*/
|
||||
public byte byteValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the specified bit set in the byte value?
|
||||
* @param no The index number of the bit. Bit 0 is the bit representing the
|
||||
* value 1, bit 7 is the bit representing the value 128.
|
||||
* @return {@code true} if the specified bit is set.
|
||||
* @throws IllegalArgumentException If {@code no} is not in the range
|
||||
* {@code 0 <= no <= 7} (inclusive).
|
||||
*/
|
||||
public boolean isBitSet(int no) throws IllegalArgumentException
|
||||
{
|
||||
if (no < 0 || no > 7)
|
||||
{
|
||||
throw new IllegalArgumentException("Invalid bit number " + no + ". It must be between 0 and 7 (inclusive)");
|
||||
}
|
||||
return (m_value & (1 << no)) > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
return (o instanceof UnsignedByte) && (((UnsignedByte) o).m_value == m_value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
public int compareTo(UnsignedByte b2)
|
||||
{
|
||||
return intValue() - b2.intValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return Short.toString((short) (m_value & 0xFF));
|
||||
}
|
||||
}
|
243
src/main/java/org/at4j/support/lang/UnsignedInteger.java
Normal file
243
src/main/java/org/at4j/support/lang/UnsignedInteger.java
Normal file
@ -0,0 +1,243 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* This object represents an unsigned integer (four bytes or 32 bits) with a
|
||||
* value between {code 0} and {@code 4294967295}. It is immutable.
|
||||
* <p>
|
||||
* Unsigned integers are created by calling any of the static creation methods
|
||||
* of this class.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0
|
||||
* @see SignedInteger
|
||||
* @see UnsignedByte
|
||||
* @see UnsignedShort
|
||||
* @see UnsignedLong
|
||||
*/
|
||||
public final class UnsignedInteger implements Serializable, Comparable<UnsignedInteger>
|
||||
{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Each unsigned integer is four bytes long.
|
||||
*/
|
||||
public static final int SIZE = 4;
|
||||
|
||||
/**
|
||||
* The maximum value of an unsigned integer (4294967295).
|
||||
*/
|
||||
public static final long MAX_VALUE = (1L << 32) - 1;
|
||||
|
||||
/**
|
||||
* The minimum value of an unsigned integer (0).
|
||||
*/
|
||||
public static final int MIN_VALUE = 0;
|
||||
|
||||
/**
|
||||
* The value 0.
|
||||
*/
|
||||
public static final UnsignedInteger ZERO = new UnsignedInteger(0);
|
||||
|
||||
/**
|
||||
* The value 1.
|
||||
*/
|
||||
public static final UnsignedInteger ONE = new UnsignedInteger(1);
|
||||
|
||||
private final int m_value;
|
||||
|
||||
private UnsignedInteger(int value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new unsigned integer. The supplied integer is treated as an
|
||||
* unsigned value, which means that negative argument values will result in
|
||||
* unsigned integer values between {@code 2147483648} and {@code 4294967295}
|
||||
* (inclusive).
|
||||
* @param value The signed integer value.
|
||||
* @return An unsigned integer value.
|
||||
*/
|
||||
public static UnsignedInteger valueOf(int value)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case 0:
|
||||
return ZERO;
|
||||
case 1:
|
||||
return ONE;
|
||||
default:
|
||||
return new UnsignedInteger(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned integer from the supplied long value which must be
|
||||
* between {@code 0} and {@code 4294967295} (inclusive).
|
||||
* @param value The value.
|
||||
* @return The unsigned integer value.
|
||||
* @throws IllegalArgumentException If the supplied value is not in the
|
||||
* permitted range.
|
||||
*/
|
||||
public static UnsignedInteger valueOf(long value) throws IllegalArgumentException
|
||||
{
|
||||
if ((value < MIN_VALUE) || (value > MAX_VALUE))
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal unsigned integer value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
|
||||
}
|
||||
return valueOf((int) (value & 0xFFFFFFFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned integer value represented as a {@code long}.
|
||||
* @return The value.
|
||||
*/
|
||||
public long longValue()
|
||||
{
|
||||
return m_value & 0xFFFFFFFFL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned integer value converted to a signed integer.
|
||||
* @return The unsigned integer value converted to a signed integer.
|
||||
*/
|
||||
public int intValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned integer value as a big-endian, four bytes long byte
|
||||
* array.
|
||||
* @return The value represented as a big-endian byte array.
|
||||
*/
|
||||
public byte[] getBigEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[4];
|
||||
res[0] = (byte) (m_value & 0xFF);
|
||||
res[1] = (byte) ((m_value >>> 8) & 0xFF);
|
||||
res[2] = (byte) ((m_value >>> 16) & 0xFF);
|
||||
res[3] = (byte) ((m_value >>> 24) & 0xFF);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned integer value from a four bytes long, big-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be four bytes long.
|
||||
* @return The unsigned integer.
|
||||
* @throws IllegalArgumentException If the supplied byte array is not four
|
||||
* bytes long.
|
||||
* @see #fromBigEndianByteArray(byte[], int)
|
||||
* @see #fromBigEndianByteArrayToLong(byte[], int)
|
||||
*/
|
||||
public static UnsignedInteger fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 4)
|
||||
{
|
||||
throw new IllegalArgumentException("The supplied byte array must be four bytes long");
|
||||
}
|
||||
return fromBigEndianByteArray(barr, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned integer value from four bytes read from the given
|
||||
* offset position in the supplied byte array. The most significant byte is
|
||||
* the last byte read.
|
||||
* @param barr The byte array to read from.
|
||||
* @param offset The offset in the byte array where the least significant
|
||||
* (first) byte is.
|
||||
* @return An unsigned integer.
|
||||
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
|
||||
* or if the offset is negative.
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
* @see #fromBigEndianByteArrayToLong(byte[], int)
|
||||
*/
|
||||
public static UnsignedInteger fromBigEndianByteArray(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
|
||||
{
|
||||
return valueOf((barr[offset] & 0xFF) + ((barr[offset + 1] & 0xFF) << 8) + ((barr[offset + 2] & 0xFF) << 16) + ((barr[offset + 3] & 0xFF) << 24));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a long value representing the unsigned integer value in the byte
|
||||
* array at the specified offset. The most significant byte is the last byte
|
||||
* read.
|
||||
* @param barr The byte array to read from.
|
||||
* @param offset The offset in the byte array where the least significant
|
||||
* (first) byte is.
|
||||
* @return A {@code long} representing the unsigned integer.
|
||||
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
|
||||
* or if the offset is negative.
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
* @see #fromBigEndianByteArray(byte[], int)
|
||||
* @see #fromLittleEndianByteArrayToLong(byte[], int)
|
||||
* @since 1.1
|
||||
*/
|
||||
public static long fromBigEndianByteArrayToLong(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
|
||||
{
|
||||
return (barr[offset] & 0xFF) + ((barr[offset + 1] & 0xFF) << 8) + ((barr[offset + 2] & 0xFF) << 16) + ((barr[offset + 3] & 0xFF) << 24);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a long value representing the unsigned integer value in the byte
|
||||
* array at the specified offset. The most significant byte is the first
|
||||
* byte read.
|
||||
* @param barr The byte array to read from.
|
||||
* @param offset The offset in the byte array where the most significant
|
||||
* (first) byte is.
|
||||
* @return A {@code long} representing the unsigned integer.
|
||||
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
|
||||
* or if the offset is negative.
|
||||
* @see #fromBigEndianByteArrayToLong(byte[], int)
|
||||
* @since 1.1
|
||||
*/
|
||||
public static long fromLittleEndianByteArrayToLong(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
|
||||
{
|
||||
return (barr[offset + 3] & 0xFF) + ((barr[offset + 2] & 0xFF) << 8) + ((barr[offset + 1] & 0xFF) << 16) + ((barr[offset] & 0xFF) << 24);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
return (o instanceof UnsignedInteger) && (((UnsignedInteger) o).m_value == m_value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
public int compareTo(UnsignedInteger i2)
|
||||
{
|
||||
return Long.valueOf(longValue()).compareTo(Long.valueOf(i2.longValue()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return Long.toString(m_value & 0xFFFFFFFFL);
|
||||
}
|
||||
}
|
224
src/main/java/org/at4j/support/lang/UnsignedLong.java
Normal file
224
src/main/java/org/at4j/support/lang/UnsignedLong.java
Normal file
@ -0,0 +1,224 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.math.BigInteger;
|
||||
|
||||
/**
|
||||
* This object represents an unsigned long (eight bytes or 64 bits) with a value
|
||||
* between {code 0} and {@code 18446744073709551615}. It is immutable.
|
||||
* <p>
|
||||
* Unsigned longs are created by calling any of the static creation methods of
|
||||
* this class.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0
|
||||
* @see SignedLong
|
||||
* @see UnsignedByte
|
||||
* @see UnsignedShort
|
||||
* @see UnsignedInteger
|
||||
*/
|
||||
public final class UnsignedLong implements Serializable, Comparable<UnsignedLong>
|
||||
{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* The minimum allowed value (0).
|
||||
*/
|
||||
public static final BigInteger MIN_VALUE = BigInteger.valueOf(0L);
|
||||
|
||||
/**
|
||||
* The maximum allowed value (18446744073709551615).
|
||||
*/
|
||||
public static final BigInteger MAX_VALUE;
|
||||
|
||||
/**
|
||||
* The value zero.
|
||||
*/
|
||||
public static final UnsignedLong ZERO = new UnsignedLong(0L);
|
||||
|
||||
/**
|
||||
* The value one.
|
||||
*/
|
||||
public static final UnsignedLong ONE = new UnsignedLong(1L);
|
||||
|
||||
private static final BigInteger HIGHEST_BIT_VALUE;
|
||||
static
|
||||
{
|
||||
BigInteger mv = BigInteger.valueOf(2L);
|
||||
MAX_VALUE = mv.pow(64).subtract(BigInteger.ONE);
|
||||
HIGHEST_BIT_VALUE = mv.pow(63);
|
||||
}
|
||||
|
||||
private final long m_value;
|
||||
|
||||
private UnsignedLong(long value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned long. The supplied value is treated as an unsigned
|
||||
* long, which means that negative argument values will result in unsigned
|
||||
* long values between {@code 9223372036854775808} and {@code
|
||||
* 18446744073709551615} (inclusive).
|
||||
* @param value The value.
|
||||
* @return An unsigned long value.
|
||||
*/
|
||||
public static UnsignedLong valueOf(long value)
|
||||
{
|
||||
if (value == 0L)
|
||||
{
|
||||
return ZERO;
|
||||
}
|
||||
else if (value == 1L)
|
||||
{
|
||||
return ONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return new UnsignedLong(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned long value from the supplied {@link BigInteger} value
|
||||
* which must be in the range {@code 0} to {@code 18446744073709551615}
|
||||
* (inclusive)
|
||||
* @param value The value.
|
||||
* @return An unsigned long value.
|
||||
* @throws IllegalArgumentException If the supplied value is negative or if
|
||||
* it is greater than {@link #MAX_VALUE}.
|
||||
*/
|
||||
public static UnsignedLong valueOf(BigInteger value) throws IllegalArgumentException
|
||||
{
|
||||
if ((value.compareTo(MIN_VALUE) < 0) || (value.compareTo(MAX_VALUE) > 0))
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal unsigned long value " + value + ". It must be between 0 and " + MAX_VALUE + " (inclusive)");
|
||||
}
|
||||
return valueOf(value.longValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned long value as a {@link BigInteger}.
|
||||
* @return The unsigned long value as a {@link BigInteger}.
|
||||
*/
|
||||
public BigInteger bigIntValue()
|
||||
{
|
||||
BigInteger res = BigInteger.valueOf(m_value & 0x7FFFFFFFFFFFFFFFL);
|
||||
return m_value < 0 ? res.add(HIGHEST_BIT_VALUE) : res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the value as a signed long. If the value is less than
|
||||
* {@link Long#MAX_VALUE}, it is returned as a positive long. If not, it is
|
||||
* returned as a negative long.
|
||||
* @return The value as a signed long value.
|
||||
*/
|
||||
public long longValue()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned long value as a big-endian, eight bytes long byte array.
|
||||
* @return The value represented as a big-endian byte array.
|
||||
*/
|
||||
public byte[] getBigEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[8];
|
||||
res[0] = (byte) (m_value & 0xFF);
|
||||
res[1] = (byte) ((m_value >>> 8) & 0xFF);
|
||||
res[2] = (byte) ((m_value >>> 16) & 0xFF);
|
||||
res[3] = (byte) ((m_value >>> 24) & 0xFF);
|
||||
res[4] = (byte) ((m_value >>> 32) & 0xFF);
|
||||
res[5] = (byte) ((m_value >>> 40) & 0xFF);
|
||||
res[6] = (byte) ((m_value >>> 48) & 0xFF);
|
||||
res[7] = (byte) ((m_value >>> 56) & 0xFF);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned long value from a eight bytes long, big-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be eight bytes long.
|
||||
* @return The unsigned long.
|
||||
* @throws IllegalArgumentException If the supplied byte array is not eight
|
||||
* bytes long.
|
||||
* @see #fromBigEndianByteArray(byte[], int)
|
||||
*/
|
||||
public static UnsignedLong fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 8)
|
||||
{
|
||||
throw new IllegalArgumentException("The supplied byte array must be eight bytes long");
|
||||
}
|
||||
return fromBigEndianByteArray(barr, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned long value from eight bytes read from the given offset
|
||||
* position in the supplied byte array. The most significant byte is the
|
||||
* last byte read.
|
||||
* @param barr The byte array to read from.
|
||||
* @param offset The offset in the byte array where the least significant
|
||||
* (first) byte is.
|
||||
* @return An unsigned long.
|
||||
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
|
||||
* or if the offset is negative.
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
*/
|
||||
public static UnsignedLong fromBigEndianByteArray(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
|
||||
{
|
||||
return valueOf((barr[offset] & 0xFFL) + ((barr[offset + 1] & 0xFFL) << 8) + ((barr[offset + 2] & 0xFFL) << 16) + ((barr[offset + 3] & 0xFFL) << 24) + ((barr[offset + 4] & 0xFFL) << 32) + ((barr[offset + 5] & 0xFFL) << 40)
|
||||
+ ((barr[offset + 6] & 0xFFL) << 48) + ((barr[offset + 7] & 0xFFL) << 56));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (o != null && o instanceof UnsignedLong)
|
||||
{
|
||||
return m_value == ((UnsignedLong) o).m_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return (int) (m_value ^ (m_value >>> 32));
|
||||
}
|
||||
|
||||
public int compareTo(UnsignedLong l2)
|
||||
{
|
||||
return bigIntValue().compareTo(l2.bigIntValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return bigIntValue().toString();
|
||||
}
|
||||
}
|
197
src/main/java/org/at4j/support/lang/UnsignedShort.java
Normal file
197
src/main/java/org/at4j/support/lang/UnsignedShort.java
Normal file
@ -0,0 +1,197 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.at4j.support.lang;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
/**
|
||||
* This object represents an unsigned short value (two bytes or 16 bits) with a
|
||||
* value between {code 0} and {@code 65535}. It is immutable.
|
||||
* <p>
|
||||
* Unsigned shorts are created by calling any of the static creation methods of
|
||||
* this class.
|
||||
* @author Karl Gustafsson
|
||||
* @since 1.0
|
||||
* @see UnsignedByte
|
||||
* @see UnsignedInteger
|
||||
* @see UnsignedLong
|
||||
*/
|
||||
public final class UnsignedShort implements Serializable, Comparable<UnsignedShort>
|
||||
{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Each unsigned short is two bytes long.
|
||||
*/
|
||||
public static final int SIZE = 2;
|
||||
|
||||
/**
|
||||
* The maximum value of an unsigned short (65535).
|
||||
*/
|
||||
public static final int MAX_VALUE = (1 << 16) - 1;
|
||||
|
||||
/**
|
||||
* The minimum value of an unsigned short (0).
|
||||
*/
|
||||
public static final int MIN_VALUE = 0;
|
||||
|
||||
/**
|
||||
* The value 0.
|
||||
*/
|
||||
public static final UnsignedShort ZERO = new UnsignedShort((short) 0);
|
||||
|
||||
/**
|
||||
* The value 1.
|
||||
*/
|
||||
public static final UnsignedShort ONE = new UnsignedShort((short) 1);
|
||||
|
||||
/**
|
||||
* The value 1000.
|
||||
*/
|
||||
public static final UnsignedShort ONE_THOUSAND = new UnsignedShort((short) 1000);
|
||||
|
||||
private final short m_value;
|
||||
|
||||
private UnsignedShort(short value)
|
||||
{
|
||||
m_value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new unsigned short. The supplied short is treated as an unsigned
|
||||
* value, which means that negative argument values will result in unsigned
|
||||
* short values between {@code 32768} and {@code 65535} (inclusive).
|
||||
* @param value The signed short value.
|
||||
* @return An unsigned short value.
|
||||
*/
|
||||
public static UnsignedShort valueOf(short value)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case 0:
|
||||
return ZERO;
|
||||
case 1:
|
||||
return ONE;
|
||||
case 1000:
|
||||
return ONE_THOUSAND;
|
||||
default:
|
||||
return new UnsignedShort(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned short from the supplied integer value which must be
|
||||
* between {@code 0} and {@code 65535} (inclusive).
|
||||
* @param value The value.
|
||||
* @return The unsigned short value.
|
||||
* @throws IllegalArgumentException If the supplied value is not in the
|
||||
* permitted range.
|
||||
*/
|
||||
public static UnsignedShort valueOf(int value) throws IllegalArgumentException
|
||||
{
|
||||
if ((value < MIN_VALUE) || (value > MAX_VALUE))
|
||||
{
|
||||
throw new IllegalArgumentException("Illegal unsigned short value " + value + ". It must be between " + MIN_VALUE + " and " + MAX_VALUE + " (inclusive)");
|
||||
}
|
||||
return valueOf((short) (value & 0xFFFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned short value.
|
||||
* @return The value.
|
||||
*/
|
||||
public int intValue()
|
||||
{
|
||||
return m_value & 0xFFFF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unsigned short value as a big-endian, two bytes long byte array.
|
||||
* @return The value represented as a big-endian byte array.
|
||||
*/
|
||||
public byte[] getBigEndianByteArray()
|
||||
{
|
||||
byte[] res = new byte[2];
|
||||
res[0] = (byte) (m_value & 0xFF);
|
||||
res[1] = (byte) ((m_value >>> 8) & 0xFF);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned short value from a two bytes long, big-endian byte
|
||||
* array.
|
||||
* @param barr The byte array. It must be two bytes long.
|
||||
* @return The unsigned short.
|
||||
* @throws IllegalArgumentException If the supplied byte array is not two
|
||||
* bytes long.
|
||||
* @see #fromBigEndianByteArray(byte[], int)
|
||||
*/
|
||||
public static UnsignedShort fromBigEndianByteArray(byte[] barr) throws IllegalArgumentException
|
||||
{
|
||||
if (barr.length != 2)
|
||||
{
|
||||
throw new IllegalArgumentException("The supplied byte array must be two bytes long");
|
||||
}
|
||||
return fromBigEndianByteArray(barr, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unsigned short value from two bytes read from the given offset
|
||||
* position in the supplied byte array. The most significant byte is the
|
||||
* last byte read.
|
||||
* @param barr The byte array to read from.
|
||||
* @param offset The offset in the byte array where the least significant
|
||||
* (first) byte is.
|
||||
* @return An unsigned short.
|
||||
* @throws ArrayIndexOutOfBoundsException If the supplied array is too short
|
||||
* or if the offset is negative.
|
||||
* @see #fromBigEndianByteArray(byte[])
|
||||
*/
|
||||
public static UnsignedShort fromBigEndianByteArray(byte[] barr, int offset) throws ArrayIndexOutOfBoundsException
|
||||
{
|
||||
return valueOf((short) ((barr[offset] & 0xFF) + ((barr[offset + 1] & 0xFF) << 8) & 0xFFFF));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
return (o instanceof UnsignedShort) && (((UnsignedShort) o).m_value == m_value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
public int compareTo(UnsignedShort s2)
|
||||
{
|
||||
return intValue() - s2.intValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return Integer.toString(m_value & 0xFFFF);
|
||||
}
|
||||
}
|
25
src/main/java/org/at4j/support/lang/package-info.java
Normal file
25
src/main/java/org/at4j/support/lang/package-info.java
Normal file
@ -0,0 +1,25 @@
|
||||
/* AT4J -- Archive file tools for Java -- http://www.at4j.org
|
||||
* Copyright (C) 2009 Karl Gustafsson
|
||||
*
|
||||
* This file is a part of AT4J
|
||||
*
|
||||
* AT4J is free software: you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* AT4J is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/**
|
||||
* Support classes that probably would have been in {@code java.lang} if they
|
||||
* had been a part of Java.
|
||||
* @since 1.0
|
||||
* @author Karl Gustafsson
|
||||
*/
|
||||
package org.at4j.support.lang;
|
@ -29,6 +29,6 @@
|
||||
"depends": {
|
||||
"fabricloader": ">=0.7.2",
|
||||
"fabric": "*",
|
||||
"minecraft": "1.16.*"
|
||||
"minecraft": "1.15.*"
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user