Merge PAC (using codepages) and FPC (using unicode)

Unicode is auto-detected and thus supported even with incorrect .pac extension.
Fixes FPC which was buggy for some encodings (some vietnamese characters at least).
This commit is contained in:
Nicolas Gaullier 2021-06-04 18:37:40 +02:00
parent 27e34cb1cc
commit d98f0939ce
2 changed files with 84 additions and 386 deletions

View File

@ -1006,10 +1006,19 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
public override string Extension => ".pac";
public virtual bool IsFPC { get; set; } = false;
public const string NameOfFormat = "PAC (Screen Electronics)";
public override string Name => NameOfFormat;
private static readonly byte[] MarkerStartOfUnicode = new byte[] { 0x1f, 0xef, 0xbb, 0xbf };
private const byte MarkerEndOfUnicode = 0x2e;
private const byte MarkerReplaceEndOfUnicode = 0xff;
private bool doWritePACHeaderOpt => IsFPC; // Unknown paragraph header. Seems optionnal both for PAC and FPC: inserted here for expected broader FPC compatibility, including prior versions of SubtitleEdit
private static readonly byte[] PACHeaderOpt = new byte[] { 0x80, 0x80, 0x80 };
public bool Save(string fileName, Subtitle subtitle)
{
using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write))
@ -1058,7 +1067,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
WriteTimeCode(fs, p.StartTime);
WriteTimeCode(fs, p.EndTime);
if (CodePage == -1)
if (CodePage == -1 && !IsFPC)
{
GetCodePage(null, 0, 0);
}
@ -1095,7 +1104,11 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
var encoding = GetEncoding(CodePage);
byte[] textBuffer;
if (CodePage == CodePageArabic)
if (IsFPC)
{
textBuffer = GetUnicodeBytes(text, alignment);
}
else if (CodePage == CodePageArabic)
{
textBuffer = GetArabicBytes(Utilities.FixEnglishTextInRightToLeftLanguage(text, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), alignment);
}
@ -1145,10 +1158,12 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
}
// write text length
var length = (UInt16)(textBuffer.Length + 4);
var length = (UInt16)(textBuffer.Length + 4 + (doWritePACHeaderOpt ? PACHeaderOpt.Length : 0));
fs.Write(BitConverter.GetBytes(length), 0, 2);
fs.WriteByte(verticalAlignment); // fs.WriteByte(0x0a); // sometimes 0x0b? - this seems to be vertical alignment - 0 to 11
if (doWritePACHeaderOpt)
fs.Write(PACHeaderOpt, 0, PACHeaderOpt.Length);
fs.WriteByte(0xfe);
fs.WriteByte(alignment); //2=centered, 1=left aligned, 0=right aligned, 09=Fount2 (large font),
//55=safe area override (too long line), 0A=Fount2 + centered, 06=centered + safe area override
@ -1270,7 +1285,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
//buffer[21] < 10 && // start from number
//buffer[22] == 0 &&
(buffer[23] >= 0x60 && buffer[23] <= 0x70) &&
fileName.EndsWith(".pac", StringComparison.OrdinalIgnoreCase))
fileName.EndsWith(Extension, StringComparison.OrdinalIgnoreCase))
{
return true;
}
@ -1336,6 +1351,16 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
private double _lastStartTotalSeconds;
private double _lastEndTotalSeconds;
private static bool CompareBytes(byte[] buff, int pos, byte[] seq)
{
if (buff.Length < pos + seq.Length)
return false;
for (int i = 0; i < seq.Length; i++)
if (buff[pos + i] != seq[i])
return false;
return true;
}
private Paragraph GetPacParagraph(ref int index, byte[] buffer, bool usesSecondaryCodePage)
{
bool isStory = index < 15;
@ -1424,7 +1449,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
byte verticalAlignment = buffer[timeStartIndex + 11];
if (CodePage == -1)
if (CodePage == -1 && !IsFPC)
{
GetCodePage(buffer, index, endDelimiter);
}
@ -1438,6 +1463,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
index += 5;
}
bool isUnicode = false;
while (index < buffer.Length && index <= maxIndex) // buffer[index] != endDelimiter)
{
if (buffer.Length > index + 3 && buffer[index] == 0x1f && Encoding.ASCII.GetString(buffer, index + 1, 3) == "W16")
@ -1445,6 +1471,11 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
w16 = true;
index += 5;
}
else if (CompareBytes(buffer, index, MarkerStartOfUnicode))
{
isUnicode = true; IsFPC = true;
index += MarkerStartOfUnicode.Length;
}
else if (buffer.Length > index + 2 && buffer[index] == 0x1f && buffer[index + 1] == 'C' && char.IsDigit((char)buffer[index + 2]))
{
index += 3;
@ -1496,10 +1527,6 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
index++;
}
}
else if (buffer[index] == 0xFF)
{
sb.Append(' ');
}
else if (buffer[index] == 0xFE)
{
alignment = buffer[index + 1];
@ -1508,6 +1535,29 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
sb.AppendLine();
index += 2;
}
else if (isUnicode)
{
if (buffer[index] == MarkerEndOfUnicode)
isUnicode = false;
else if (buffer[index] == MarkerReplaceEndOfUnicode)
sb.Append((char)MarkerEndOfUnicode);
else
{
int len = 1;
byte b = buffer[index];
if (b >= 0xE0)
len = 3;
else if (b >= 0xC0)
len = 2;
if (buffer.Length > index + len - 1)
sb.Append(Encoding.UTF8.GetString(buffer, index, len));
index += len - 1;
}
}
else if (buffer[index] == 0xFF)
{
sb.Append(' ');
}
else if (CodePage == CodePageLatin || CodePage == CodePageLatinTurkish || CodePage == CodePageLatinCzech
|| (usesSecondaryCodePage && !isSecondaryCodePage))
{
@ -2011,6 +2061,29 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return true;
}
private static byte[] GetUnicodeBytes(string text, byte alignment)
{
var result = new List<byte>();
bool firstLine = true;
foreach (var line in text.SplitToLines())
{
if (!firstLine)
{
result.Add(0xfe);
result.Add(alignment);
result.Add(3);
}
result.AddRange(MarkerStartOfUnicode);
foreach (var b in Encoding.UTF8.GetBytes(line))
{
result.Add(b == MarkerEndOfUnicode ? MarkerReplaceEndOfUnicode : b);
}
result.Add(MarkerEndOfUnicode);
firstLine = false;
}
return result.ToArray();
}
public static string GetArabicString(byte[] buffer, ref int index)
{
var arabicCharacter = GetNextArabicCharacter(buffer, ref index);

View File

@ -9,387 +9,12 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
/// <summary>
/// UniPac
/// </summary>
public class PacUnicode : SubtitleFormat
public class PacUnicode : Pac
{
public override string Extension => ".fpc";
public override string Name => "PAC Unicode (UniPac)";
public override bool IsMine(List<string> lines, string fileName)
{
if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName))
{
try
{
var fi = new FileInfo(fileName);
if (fi.Length > 100 && fi.Length < 1024000) // not too small or too big
{
byte[] buffer = FileUtil.ReadAllBytesShared(fileName);
if (buffer[00] == 1 &&
buffer[01] == 0 &&
buffer[02] == 0 &&
buffer[03] == 0 &&
buffer[04] == 0 &&
buffer[05] == 0 &&
buffer[06] == 0 &&
buffer[07] == 0 &&
buffer[08] == 0 &&
buffer[09] == 0 &&
buffer[10] == 0 &&
buffer[11] == 0 &&
buffer[12] == 0 &&
buffer[13] == 0 &&
buffer[14] == 0 &&
buffer[15] == 0 &&
buffer[16] == 0 &&
buffer[17] == 0 &&
buffer[18] == 0 &&
buffer[19] == 0 &&
buffer[20] == 0 &&
fileName.EndsWith(".fpc", StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
}
catch
{
return false;
}
}
return false;
}
public override string ToText(Subtitle subtitle, string title)
{
return "Not supported!";
}
public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
{
subtitle.Paragraphs.Clear();
subtitle.Header = null;
byte[] buffer = FileUtil.ReadAllBytesShared(fileName);
int index = 0;
while (index < buffer.Length)
{
var p = GetPacParagraph(ref index, buffer);
if (p != null)
{
subtitle.Paragraphs.Add(p);
}
}
if (subtitle.Paragraphs.Count > 2 && subtitle.Paragraphs[0].StartTime.TotalMilliseconds < 0.001 && subtitle.Paragraphs[0].EndTime.TotalMilliseconds < 0.001)
{
subtitle.Paragraphs.RemoveAt(0);
}
subtitle.Renumber();
}
private static Paragraph GetPacParagraph(ref int index, byte[] buffer)
{
while (index < 15)
{
index++;
}
bool con = true;
while (con)
{
index++;
if (index + 20 >= buffer.Length)
{
return null;
}
if (buffer[index] == 0xFE && buffer[index - 1] == 0x80)
{
con = false;
}
}
int feIndex = index;
byte alignment = buffer[feIndex + 1];
byte verticalAlignment = buffer[feIndex - 1];
var p = new Paragraph();
int timeStartIndex = feIndex - 15;
p.StartTime = Pac.GetTimeCode(timeStartIndex + 1, buffer);
p.EndTime = Pac.GetTimeCode(timeStartIndex + 5, buffer);
int textLength = buffer[timeStartIndex + 9] + buffer[timeStartIndex + 10] * 256;
if (textLength > 500)
{
return null; // probably not correct index
}
int maxIndex = timeStartIndex + 10 + textLength;
var sb = new StringBuilder();
index = feIndex + 3;
int textIndex = index;
int textBegin = index;
while (textIndex < buffer.Length && textIndex <= maxIndex)
{
if (buffer[textIndex] == 0xFE)
{
if (textIndex > textBegin)
{
for (int j = textBegin; j <= textIndex - textBegin - 1; j++)
{
if (buffer[j] == 0xff)
{
buffer[j] = 0x2e; // replace end of line marker
}
}
sb.AppendLine(Encoding.UTF8.GetString(buffer, textBegin, textIndex - textBegin));
textBegin = textIndex + 7;
textIndex += 6;
}
}
else if (buffer[textIndex] == 0xFF)
{
sb.Append(' ');
}
textIndex++;
}
if (textIndex > textBegin)
{
sb.Append(Encoding.UTF8.GetString(buffer, textBegin, textIndex - textBegin - 1));
}
p.Text = sb.ToString().Trim();
if (p.Text.Length > 1 && (p.Text[0] == 31 || p.Text[1] == 65279))
{
p.Text = p.Text.Remove(0, 2);
}
for (int k = 0; k < p.Text.Length; k++)
{
if (p.Text[k] == 65533)
{
p.Text = p.Text.Remove(k, 1).Insert(k, ".");
}
}
index += textLength;
if (index + 20 >= buffer.Length)
{
return null;
}
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
p.Text = p.Text.Replace(Environment.NewLine + ">", Environment.NewLine);
p.Text = p.Text.Replace("\0", string.Empty);
if (verticalAlignment < 5)
{
if (alignment == 1) // left
{
p.Text = "{\\an7}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an9}" + p.Text;
}
else
{
p.Text = "{\\an8}" + p.Text;
}
}
else if (verticalAlignment < 9)
{
if (alignment == 1) // left
{
p.Text = "{\\an4}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an6}" + p.Text;
}
else
{
p.Text = "{\\an5}" + p.Text;
}
}
else
{
if (alignment == 1) // left
{
p.Text = "{\\an1}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an3}" + p.Text;
}
}
// Remove all control-characters if any in p.Text.
p.Text = p.Text.RemoveControlCharactersButWhiteSpace();
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
// Fix italics (basic)
if (p.Text.StartsWith('<') &&
!p.Text.StartsWith("<i>", StringComparison.OrdinalIgnoreCase) &&
!p.Text.StartsWith("<b>", StringComparison.OrdinalIgnoreCase) &&
!p.Text.StartsWith("<u>", StringComparison.OrdinalIgnoreCase) &&
!p.Text.StartsWith("<font ", StringComparison.OrdinalIgnoreCase))
{
p.Text = "<i>" + p.Text.TrimStart('<').Replace(Environment.NewLine + "<", Environment.NewLine) + "</i>";
}
else if (p.Text.Contains(Environment.NewLine + "<"))
{
p.Text = p.Text.Replace(Environment.NewLine + "<", Environment.NewLine + "<i>") + "</i>";
}
return p;
}
public void Save(string fileName, Subtitle subtitle)
{
using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write))
{
// header
fs.WriteByte(1);
for (int i = 1; i < 24; i++)
{
fs.WriteByte(0);
}
// paragraphs
var sub = new Subtitle(subtitle);
sub.Paragraphs.Insert(0, new Paragraph { Text = "-" });
int number = 0;
foreach (var p in sub.Paragraphs)
{
WriteParagraph(fs, p, number, number + 1 == sub.Paragraphs.Count);
number++;
}
// footer
fs.WriteByte(0xff);
for (int i = 0; i < 11; i++)
{
fs.WriteByte(0);
}
fs.WriteByte(0x11);
fs.WriteByte(0);
byte[] footerBuffer = Encoding.ASCII.GetBytes("dummy end of file.");
fs.Write(footerBuffer, 0, footerBuffer.Length);
}
}
private void WriteParagraph(FileStream fs, Paragraph p, int number, bool isLast)
{
Pac.WriteTimeCode(fs, p.StartTime);
Pac.WriteTimeCode(fs, p.EndTime);
byte alignment = 2; // center
var verticalAlignment = (byte)Math.Max(0, Configuration.Settings.SubtitleSettings.PacVerticalBottom + 1 - Utilities.GetNumberOfLines(p.Text));
string text = p.Text;
if (text.StartsWith("{\\an1}", StringComparison.Ordinal) || text.StartsWith("{\\an4}", StringComparison.Ordinal) || text.StartsWith("{\\an7}", StringComparison.Ordinal))
{
alignment = 1; // left
}
else if (text.StartsWith("{\\an3}", StringComparison.Ordinal) || text.StartsWith("{\\an6}", StringComparison.Ordinal) || text.StartsWith("{\\an9}", StringComparison.Ordinal))
{
alignment = 0; // right
}
if (text.StartsWith("{\\an7}", StringComparison.Ordinal) || text.StartsWith("{\\an8}", StringComparison.Ordinal) || text.StartsWith("{\\an9}", StringComparison.Ordinal))
{
verticalAlignment = (byte)Configuration.Settings.SubtitleSettings.PacVerticalTop; // top
}
else if (text.StartsWith("{\\an4}", StringComparison.Ordinal) || text.StartsWith("{\\an5}", StringComparison.Ordinal) || text.StartsWith("{\\an6}", StringComparison.Ordinal))
{
verticalAlignment = (byte)Configuration.Settings.SubtitleSettings.PacVerticalCenter; // center
}
if (text.Length >= 6 && text[0] == '{' && text[5] == '}')
{
text = text.Remove(0, 6);
}
text = Pac.MakePacItalicsAndRemoveOtherTags(text);
byte[] textBuffer = GetUf8Bytes(text, alignment);
// write text length
var length = (UInt16)(textBuffer.Length + 4 + 3);
fs.Write(BitConverter.GetBytes(length), 0, 2);
fs.WriteByte(verticalAlignment); // fs.WriteByte(0x0a); // sometimes 0x0b? - this seems to be vertical alignment - 0 to 11
fs.WriteByte(0x80);
fs.WriteByte(0x80);
fs.WriteByte(0x80);
fs.WriteByte(0xfe);
fs.WriteByte(alignment); //2=centered, 1=left aligned, 0=right aligned, 09=Fount2 (large font),
//55=safe area override (too long line), 0A=Fount2 + centered, 06=centered + safe area override
fs.WriteByte(0x03);
fs.Write(textBuffer, 0, textBuffer.Length);
if (!isLast)
{
fs.WriteByte(0);
fs.WriteByte((byte)((number + 1) % 256));
fs.WriteByte((byte)((number + 1) / 256));
fs.WriteByte(0x60);
}
}
private static byte[] GetUf8Bytes(string text, byte alignment)
{
var result = new List<byte>();
bool firstLine = true;
var lines = text.SplitToLines();
for (var i = 0; i < lines.Count; i++)
{
var line = lines[i];
if (!firstLine)
{
result.Add(0xfe);
result.Add(alignment);
result.Add(3);
result.Add(0x1F); // utf8 BOM
result.Add(0xEF);
result.Add(0xBB);
result.Add(0xBF);
}
string s = line;
for (int index = 0; index < s.Length; index++)
{
var ch = s[index];
if (ch == '.') // 0x2e
{
result.Add(0xff); // period
}
else
{
foreach (var b in Encoding.UTF8.GetBytes(ch.ToString()))
{
result.Add(b);
}
}
}
firstLine = false;
}
result.Add(0x2e);
return result.ToArray();
}
public override bool IsFPC => true;
}
}