PAC: Support Portuguese codepage + more - thx Ingo :)

This commit is contained in:
Nikolaj Olsson 2024-08-11 18:07:11 +02:00
parent da474d3254
commit 1606f7d8d7
3 changed files with 412 additions and 259 deletions

View File

@ -2,6 +2,7 @@ using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Interfaces;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
@ -27,7 +28,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
public static bool IsValidCodePage(int codePage)
{
return 0 <= codePage && codePage <= 11;
return 0 <= codePage && codePage <= 12;
}
public const int CodePageLatin = 0;
public const int CodePageGreek = 1;
@ -41,6 +42,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
public const int CodePageKorean = 9;
public const int CodePageJapanese = 10;
public const int CodePageLatinTurkish = 11;
public const int CodePageLatinPortuguese = 12;
public const int EncodingChineseSimplified = 936;
public const int EncodingChineseTraditional = 950;
@ -72,6 +74,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x558a, new SpecialCharacter("Ū")},
{ 0x758a, new SpecialCharacter("ū")},
{ 0x09, new SpecialCharacter(" ")},
{ 0x23, new SpecialCharacter("£")},
{ 0x7c, new SpecialCharacter("æ")},
{ 0x7d, new SpecialCharacter("ø")},
@ -394,6 +397,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xEC55, new SpecialCharacter("Ű")},
{ 0xEC6F, new SpecialCharacter("ő")},
{ 0xEC75, new SpecialCharacter("ű")},
{ 0x1c00, new SpecialCharacter(" ")},
{ 0xC0, new SpecialCharacter("[")},
{ 0xC1, new SpecialCharacter("]")},
};
@ -426,6 +430,12 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xEA75, new SpecialCharacter("ŭ")},
};
private static readonly Dictionary<int, SpecialCharacter> LatinPortugueseOverrides = new Dictionary<int, SpecialCharacter>
{
{ 0x5f, new SpecialCharacter("-")},
{ 0x2d, new SpecialCharacter("")},
};
private static readonly Dictionary<int, SpecialCharacter> HebrewCodes = new Dictionary<int, SpecialCharacter>
{
{ 0x80, new SpecialCharacter("ְ")},
@ -441,6 +451,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x8b, new SpecialCharacter("ֻ")},
{ 0x8c, new SpecialCharacter("ּ")},
{ 0x8d, new SpecialCharacter("ֽ")},
{ 0x91, new SpecialCharacter("")},
{ 0x92, new SpecialCharacter("֗")},
{ 0xa0, new SpecialCharacter("א")},
{ 0xa1, new SpecialCharacter("ב")},
@ -492,6 +503,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xdd, new SpecialCharacter("דּ")},
{ 0xde, new SpecialCharacter("גּ")},
{ 0xdf, new SpecialCharacter("בּ")},
{ 0x2b, new SpecialCharacter(".")},
{ 0x2e, new SpecialCharacter(".")},
{ 0x2c, new SpecialCharacter(",")}
};
@ -505,6 +517,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xe181, new SpecialCharacter("إ")},
{ 0xe281, new SpecialCharacter("آ")},
{ 0xe781, new SpecialCharacter("اً")},
{ 0xe500, new SpecialCharacter(" َ")},
{ 0x80, new SpecialCharacter("ـ")},
{ 0x81, new SpecialCharacter("ا")},
{ 0x82, new SpecialCharacter("ب")},
@ -548,6 +561,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x25, new SpecialCharacter("٪")},
{ 0x2c, new SpecialCharacter("،")},
{ 0x3b, new SpecialCharacter("؛")},
{ 0xed, new SpecialCharacter("\u064A")},
{ 0xe7, new SpecialCharacter("\u064B", true)},
{ 0xea, new SpecialCharacter("\u064C", true)},
{ 0xe8, new SpecialCharacter("\u064D", true)},
@ -563,6 +577,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
private static readonly Dictionary<int, SpecialCharacter> CyrillicCodes = new Dictionary<int, SpecialCharacter>
{
{ 0x09, new SpecialCharacter(" ")},
{ 0x20, new SpecialCharacter(" ")},
{ 0x21, new SpecialCharacter("!")},
{ 0x22, new SpecialCharacter("Э")},
@ -578,11 +593,14 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x2c, new SpecialCharacter("б")},
{ 0x2d, new SpecialCharacter("-")},
{ 0x2e, new SpecialCharacter("ю")},
{ 0x2f, new SpecialCharacter("/")},
{ 0x3a, new SpecialCharacter("Ж")},
{ 0x3b, new SpecialCharacter("ж")},
{ 0x3c, new SpecialCharacter("<")},
{ 0x3d, new SpecialCharacter("=")},
{ 0x3e, new SpecialCharacter(">")},
{ 0x3f, new SpecialCharacter("?")},
{ 0x40, new SpecialCharacter("@")},
{ 0x41, new SpecialCharacter("Ф")},
{ 0x42, new SpecialCharacter("И")},
{ 0x43, new SpecialCharacter("С")},
@ -610,6 +628,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x59, new SpecialCharacter("Н")},
{ 0x5a, new SpecialCharacter("Я")},
{ 0x5b, new SpecialCharacter("х")},
{ 0x5c, new SpecialCharacter("\\")},
{ 0x5d, new SpecialCharacter("ъ")},
{ 0x5e, new SpecialCharacter(",")},
{ 0x5f, new SpecialCharacter("-")},
@ -629,6 +648,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x6e, new SpecialCharacter("т")},
{ 0x6f, new SpecialCharacter("щ")},
{ 0x70, new SpecialCharacter("з")},
{ 0x71, new SpecialCharacter("q")},
{ 0x72, new SpecialCharacter("к")},
{ 0x73, new SpecialCharacter("ы")},
{ 0x74, new SpecialCharacter("е")},
@ -640,6 +660,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x7a, new SpecialCharacter("я")},
{ 0x7b, new SpecialCharacter("Х")},
{ 0x7d, new SpecialCharacter("Ъ")},
{ 0x7e, new SpecialCharacter("~")},
{ 0x80, new SpecialCharacter("Б")},
{ 0x81, new SpecialCharacter("Ю")},
{ 0x82, new SpecialCharacter("Ђ")},
@ -654,38 +675,43 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x8d, new SpecialCharacter("ґ")},
{ 0x8f, new SpecialCharacter("Џ")},
{ 0x90, new SpecialCharacter("№")},
{ 0x91, new SpecialCharacter("")},
{ 0x92, new SpecialCharacter("ђ")},
{ 0x93, new SpecialCharacter("“")},
{ 0x94, new SpecialCharacter("є")},
{ 0x95, new SpecialCharacter("ѕ")},
{ 0x96, new SpecialCharacter("і")},
{ 0x97, new SpecialCharacter("—")},
{ 0x98, new SpecialCharacter("ј")},
{ 0x99, new SpecialCharacter("љ")},
{ 0x9a, new SpecialCharacter("њ")},
{ 0x9b, new SpecialCharacter("ћ")},
{ 0x9d, new SpecialCharacter("§")},
{ 0x9f, new SpecialCharacter("џ")},
{ 0xa0, new SpecialCharacter(" ")},
{ 0xa2, new SpecialCharacter("%")},
{ 0xa4, new SpecialCharacter("&")},
{ 0xac, new SpecialCharacter("C")},
{ 0xab, new SpecialCharacter("«")},
{ 0xac, new SpecialCharacter("¬")},
{ 0xad, new SpecialCharacter("D")},
{ 0xae, new SpecialCharacter("E")},
{ 0xae, new SpecialCharacter("®")},
{ 0xaf, new SpecialCharacter("F")},
{ 0xb0, new SpecialCharacter("G")},
{ 0xb0, new SpecialCharacter("°")},
{ 0xb1, new SpecialCharacter("H")},
{ 0xb2, new SpecialCharacter("'")},
{ 0xb3, new SpecialCharacter("\"")},
{ 0xb4, new SpecialCharacter("I")},
{ 0xb4, new SpecialCharacter("ґ")},
{ 0xb5, new SpecialCharacter("J")},
{ 0xb6, new SpecialCharacter("K")},
{ 0xb7, new SpecialCharacter("L")},
{ 0xb8, new SpecialCharacter("M")},
{ 0xb9, new SpecialCharacter("N")},
{ 0xb6, new SpecialCharacter("")},
{ 0xb7, new SpecialCharacter("·")},
{ 0xb8, new SpecialCharacter("ё")},
{ 0xb9, new SpecialCharacter("")},
{ 0xba, new SpecialCharacter("P")},
{ 0xbb, new SpecialCharacter("Q")},
{ 0xbc, new SpecialCharacter("R")},
{ 0xbd, new SpecialCharacter("S")},
{ 0xbe, new SpecialCharacter("T")},
{ 0xbf, new SpecialCharacter("U")},
{ 0xbb, new SpecialCharacter("»")},
{ 0xbc, new SpecialCharacter("ј")},
{ 0xbd, new SpecialCharacter("Ѕ")},
{ 0xbe, new SpecialCharacter("ѕ")},
{ 0xbf, new SpecialCharacter("ї")},
{ 0xc0, new SpecialCharacter("V")},
{ 0xc1, new SpecialCharacter("*")},
{ 0xc2, new SpecialCharacter("W")},
@ -736,6 +762,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xe255, new SpecialCharacter("Ѓ")},
{ 0xe272, new SpecialCharacter("ќ")},
{ 0xe275, new SpecialCharacter("ѓ")},
{ 0xe2c9, new SpecialCharacter("è")},
{ 0xe342, new SpecialCharacter("Ѝ")},
{ 0xe354, new SpecialCharacter("Ѐ")},
{ 0xe362, new SpecialCharacter("ѝ")},
@ -979,7 +1006,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0x2A, new SpecialCharacter("*") },
{ 0x2B, new SpecialCharacter("+") },
{ 0x2C, new SpecialCharacter(",") },
{ 0x2D, new SpecialCharacter("-") },
{ 0x2D, new SpecialCharacter("") },
{ 0x2E, new SpecialCharacter(".") },
{ 0x2F, new SpecialCharacter("/") },
{ 0x3A, new SpecialCharacter(":") },
@ -1081,6 +1108,8 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xA3, new SpecialCharacter("X") },
{ 0xA4, new SpecialCharacter("Y") },
{ 0xA5, new SpecialCharacter("Z") },
{ 0xA7, new SpecialCharacter("§") },
{ 0xA8, new SpecialCharacter("¨") },
{ 0xA9, new SpecialCharacter("°") },
{ 0xAB, new SpecialCharacter("½") },
{ 0xAC, new SpecialCharacter("a") },
@ -1111,6 +1140,13 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xC5, new SpecialCharacter("z") },
{ 0x202A, new SpecialCharacter("®") },
{ 0xE22041, new SpecialCharacter("Ά") },
{ 0xE22045, new SpecialCharacter("Έ") },
{ 0xE22047, new SpecialCharacter("Ή") },
{ 0xE22049, new SpecialCharacter("Ί") },
{ 0xE2204F, new SpecialCharacter("Ό") },
{ 0xE22055, new SpecialCharacter("Ύ") },
{ 0xE22059, new SpecialCharacter("Ώ") },
{ 0xE241, new SpecialCharacter("Ά") },
{ 0xE242, new SpecialCharacter("Β́") },
{ 0xE243, new SpecialCharacter("Γ́") },
@ -1155,16 +1191,22 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{ 0xE278, new SpecialCharacter("ψ́") },
{ 0xE279, new SpecialCharacter("ώ") },
{ 0xE27B, new SpecialCharacter("ί") },
{ 0xe220, new SpecialCharacter("´")},
{ 0xE2, new SpecialCharacter("β") },
{ 0xE320, new SpecialCharacter("`") },
{ 0xE3, new SpecialCharacter("γ") },
{ 0xE399, new SpecialCharacter("Ǹ") },
{ 0xE39A, new SpecialCharacter("Ò") },
{ 0xE3A0, new SpecialCharacter("Ù") },
{ 0xE3A2, new SpecialCharacter("Ẁ") },
{ 0xE3A4, new SpecialCharacter("Ỳ") },
{ 0xE3B0, new SpecialCharacter("è") },
{ 0xE3B9, new SpecialCharacter("ǹ") },
{ 0xE3C2, new SpecialCharacter("ẁ") },
{ 0xE3C4, new SpecialCharacter("ỳ") },
{ 0xE549, new SpecialCharacter("Ϊ") },
{ 0xE4, new SpecialCharacter("δ") },
{ 0xE5, new SpecialCharacter("ε") },
{ 0xE555, new SpecialCharacter("Ϋ") },
{ 0xE561, new SpecialCharacter("α̈") },
{ 0xE562, new SpecialCharacter("β̈") },
@ -1337,6 +1379,10 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
textBuffer = GetLatinBytes(encoding, text, alignment, LatinTurkishOverrides);
}
else if (CodePage == CodePageLatinPortuguese)
{
textBuffer = GetLatinBytes(encoding, text, alignment, LatinPortugueseOverrides);
}
else if (CodePage == CodePageChineseTraditional)
{
textBuffer = GetW16Bytes(text, alignment, EncodingChineseTraditional);
@ -1438,11 +1484,11 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
var seconds = Math.Max(0, timeCode.Seconds);
var milliseconds = Math.Max(0, timeCode.Milliseconds);
string highPart = $"{hours:00}{minutes:00}";
byte frames = (byte)MillisecondsToFramesMaxFrameRate(milliseconds);
string lowPart = $"{seconds:00}{frames:00}";
var highPart = $"{hours:00}{minutes:00}";
var frames = (byte)MillisecondsToFramesMaxFrameRate(milliseconds);
var lowPart = $"{seconds:00}{frames:00}";
int high = int.Parse(highPart);
var high = int.Parse(highPart);
if (high < 256)
{
fs.WriteByte((byte)high);
@ -1454,7 +1500,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
fs.WriteByte((byte)(high / 256));
}
int low = int.Parse(lowPart);
var low = int.Parse(lowPart);
if (low < 256)
{
fs.WriteByte((byte)low);
@ -1469,50 +1515,53 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
public override bool IsMine(List<string> lines, string fileName)
{
if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName))
if (string.IsNullOrEmpty(fileName) || !File.Exists(fileName))
{
try
{
var fi = new FileInfo(fileName);
if (fi.Length > 65 && fi.Length < 1024000) // not too small or too big
{
byte[] buffer = FileUtil.ReadAllBytesShared(fileName);
return false;
}
if (buffer[00] == 1 && // These bytes seems to be PAC files... TODO: Verify!
buffer[01] == 0 &&
buffer[02] == 0 &&
buffer[03] == 0 &&
buffer[04] == 0 &&
buffer[05] == 0 &&
buffer[06] == 0 &&
buffer[07] == 0 &&
buffer[08] == 0 &&
buffer[09] == 0 &&
buffer[10] == 0 &&
buffer[11] == 0 &&
buffer[12] == 0 &&
buffer[13] == 0 &&
buffer[14] == 0 &&
buffer[15] == 0 &&
buffer[16] == 0 &&
buffer[17] == 0 &&
buffer[18] == 0 &&
buffer[19] == 0 &&
buffer[20] == 0 &&
//buffer[21] < 10 && // start from number
//buffer[22] == 0 &&
//(buffer[23] >= 0x60 && buffer[23] <= 0x70) &&
fileName.EndsWith(Extension, StringComparison.OrdinalIgnoreCase))
{
return true;
}
try
{
var fi = new FileInfo(fileName);
if (fi.Length > 65 && fi.Length < 1024000) // not too small or too big
{
byte[] buffer = FileUtil.ReadAllBytesShared(fileName);
if (buffer[00] == 1 && // These bytes seems to be PAC files... TODO: Verify!
buffer[01] == 0 &&
buffer[02] == 0 &&
buffer[03] == 0 &&
buffer[04] == 0 &&
buffer[05] == 0 &&
buffer[06] == 0 &&
buffer[07] == 0 &&
buffer[08] == 0 &&
buffer[09] == 0 &&
buffer[10] == 0 &&
buffer[11] == 0 &&
buffer[12] == 0 &&
buffer[13] == 0 &&
buffer[14] == 0 &&
buffer[15] == 0 &&
buffer[16] == 0 &&
buffer[17] == 0 &&
buffer[18] == 0 &&
buffer[19] == 0 &&
buffer[20] == 0 &&
//buffer[21] < 10 && // start from number
//buffer[22] == 0 &&
//(buffer[23] >= 0x60 && buffer[23] <= 0x70) &&
fileName.EndsWith(Extension, StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
catch
{
return false;
}
}
catch
{
return false;
}
return false;
}
@ -1531,11 +1580,11 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
subtitle.Paragraphs.Clear();
subtitle.Header = null;
var usesSecondaryCodePage = UsesSecondaryCodePage(buffer);
Analyze(buffer, out bool secondaryCodePageIsMain, out bool hasStory, out string language);
var index = 0;
while (index < buffer.Length)
{
var p = GetPacParagraph(ref index, buffer, usesSecondaryCodePage, subtitle.Paragraphs.Count);
var p = GetPacParagraph(ref index, buffer, secondaryCodePageIsMain, hasStory, subtitle.Paragraphs.Count);
if (p != null)
{
subtitle.Paragraphs.Add(p);
@ -1545,16 +1594,48 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
subtitle.Renumber();
}
private static bool UsesSecondaryCodePage(byte[] buffer)
private void Analyze(byte[] buffer, out bool secondaryCodePageIsMain, out bool hasStory, out string language)
{
bool? firstIsSecondary = null;
hasStory = false;
language = null;
var total = 0;
var secondaryUse = 0;
var firstIsSecondary = false;
for (var i = 15; i < buffer.Length - 1; i++)
{
if (buffer[i] == 0xFE && (buffer[i - 15] == 0x60 || buffer[i - 15] == 0x61 || buffer[i - 12] == 0x60 || buffer[i - 12] == 0x61))
{
while (i + 3 < buffer.Length && buffer[i + 3] == 0xFE)
{
i += 3;
}
var secondary = (buffer[i + 1] & 0x08) != 0;
firstIsSecondary = firstIsSecondary ?? secondary;
if (total == 0)
{
firstIsSecondary = secondary;
for (var j = i + 2; j < buffer.Length - 6 && (buffer[j] != 0x00 || buffer[j - 1] == 0xFE || buffer[j - 2] == 0xFE); j++)
{
if (buffer[j] == 0xFE)
{
secondary = (buffer[j + 1] & 0x08) != 0;
}
var lineEnd = Array.FindIndex(buffer, j, b => b == 0xfe || b == 0x00);
if (Encoding.ASCII.GetString(buffer, j, 6).ToUpperInvariant() == "STORY:")
{
j += 6;
firstIsSecondary = secondary;
hasStory = true;
}
else if (Encoding.ASCII.GetString(buffer, j, 5).ToUpperInvariant() == "LANG:" && lineEnd > j + 5)
{
language = Encoding.ASCII.GetString(buffer, j + 5, lineEnd - (j + 5)).Trim().ToUpperInvariant();
}
}
}
total++;
if (secondary)
{
secondaryUse++;
@ -1562,7 +1643,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
}
}
return secondaryUse > (firstIsSecondary.GetValueOrDefault() ? 1 : 0);
secondaryCodePageIsMain = secondaryUse > (hasStory && firstIsSecondary ? (total - 1) : 0);
}
private double _lastStartTotalSeconds;
@ -1575,7 +1656,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return false;
}
for (int i = 0; i < seq.Length; i++)
for (var i = 0; i < seq.Length; i++)
{
if (buff[pos + i] != seq[i])
{
@ -1586,7 +1667,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return true;
}
private Paragraph GetPacParagraph(ref int index, byte[] buffer, bool usesSecondaryCodePage, int paragraphIndex)
private Paragraph GetPacParagraph(ref int index, byte[] buffer, bool secondaryCodePageIsMain, bool hasStory, int paragraphIndex)
{
var isStory = index < 15;
if (isStory)
@ -1605,13 +1686,13 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
if (buffer[index] == 0xFE)
{
var minus15 = buffer[index - 15];
if (minus15 == 0x60 || minus15 == 0x61 || minus15 == 0x62)
if (minus15 >= 0x60 && minus15 <= 0x67)
{
break;
}
var minus12 = buffer[index - 12];
if (minus12 == 0x60 || minus12 == 0x61 || minus12 == 0x62)
if (minus12 >= 0x60 && minus12 <= 0x67)
{
break;
}
@ -1636,7 +1717,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
p.StartTime = GetTimeCode(timeStartIndex + 1, buffer);
p.EndTime = GetTimeCode(timeStartIndex + 5, buffer);
}
else if (buffer[timeStartIndex] == 0x61 || buffer[timeStartIndex] == 0x62)
else if (buffer[timeStartIndex] >= 0x61 && buffer[timeStartIndex] <= 0x67)
{
p.StartTime = GetTimeCode(timeStartIndex + 1, buffer);
p.EndTime = GetTimeCode(timeStartIndex + 5, buffer);
@ -1648,7 +1729,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return null;
}
}
else if (buffer[timeStartIndex + 3] == 0x61 || buffer[timeStartIndex + 3] == 0x62)
else if (buffer[timeStartIndex + 3] >= 0x61 && buffer[timeStartIndex + 3] <= 0x67)
{
timeStartIndex += 3;
p.StartTime = GetTimeCode(timeStartIndex + 1, buffer);
@ -1684,224 +1765,262 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
GetCodePage(buffer, index, endDelimiter);
}
var overrides = CodePage == CodePageLatinTurkish ? LatinTurkishOverrides : null;
var overrides = (CodePage == CodePageLatinTurkish) ? LatinTurkishOverrides : (CodePage == CodePageLatinPortuguese) ? LatinPortugueseOverrides : null;
var sb = new StringBuilder();
index = feIndex + 3;
var w16 = buffer[index] == 0x1f && Encoding.ASCII.GetString(buffer, index + 1, 3) == "W16";
if (w16)
try
{
index += 5;
}
var isUnicode = false;
while (index < buffer.Length && index <= maxIndex) // buffer[index] != endDelimiter)
{
if (buffer.Length > index + 3 && buffer[index] == 0x1f && Encoding.ASCII.GetString(buffer, index + 1, 3) == "W16")
{
w16 = true;
index += 5;
}
else if (CompareBytes(buffer, index, MarkerStartOfUnicode))
{
isUnicode = true; IsFpc = true;
index += MarkerStartOfUnicode.Length;
}
else if (buffer.Length > index + 2 && buffer[index] == 0x1f && buffer[index + 1] == 'C' && char.IsDigit((char)buffer[index + 2]))
{
index += 3;
continue;
}
index = feIndex + 3;
var w16 = buffer[index] == 0x1f && Encoding.ASCII.GetString(buffer, index + 1, 3) == "W16";
if (w16)
{
if (buffer[index] == 0xFE)
index += 5;
}
var isUnicode = false;
while (index < buffer.Length && index <= maxIndex) // buffer[index] != endDelimiter)
{
if (buffer.Length > index + 3 && buffer[index] == 0x1f && Encoding.ASCII.GetString(buffer, index + 1, 3) == "W16")
{
w16 = true;
index += 5;
}
else if (CompareBytes(buffer, index, MarkerStartOfUnicode))
{
isUnicode = true; IsFpc = true;
index += MarkerStartOfUnicode.Length;
}
else if (buffer.Length > index + 2 && buffer[index] == 0x1f && buffer[index + 1] == 'C')
{
if (char.IsDigit((char)buffer[index + 2]))
{
index += 3;
continue;
}
else
{
// Skip comments
while (buffer.Length > index && buffer[index] != 0) index++;
break;
}
}
if (w16)
{
if (buffer[index] == 0xFE)
{
alignment = buffer[index + 1];
isSecondaryCodePage = (alignment & 0x08) != 0;
alignment &= 0x07;
sb.AppendLine();
w16 = buffer[index + 3] == 0x1f && Encoding.ASCII.GetString(buffer, index + 4, 3) == "W16";
if (w16)
{
index += 5;
}
index += 2;
}
else
{
if (buffer[index] == 0)
{
sb.Append(Encoding.ASCII.GetString(buffer, index + 1, 1));
}
else if (buffer.Length > index + 1)
{
if (CodePage == CodePageChineseSimplified)
{
sb.Append(Encoding.GetEncoding(EncodingChineseSimplified).GetString(buffer, index, 2));
}
else if (CodePage == CodePageKorean)
{
sb.Append(Encoding.GetEncoding(EncodingKorean).GetString(buffer, index, 2));
}
else if (CodePage == CodePageJapanese)
{
sb.Append(Encoding.GetEncoding(EncodingJapanese).GetString(buffer, index, 2));
}
else
{
sb.Append(Encoding.GetEncoding(EncodingChineseTraditional).GetString(buffer, index, 2));
}
}
index++;
}
}
else if (buffer[index] == 0xFE)
{
alignment = buffer[index + 1];
isSecondaryCodePage = (alignment & 0x08) != 0;
alignment &= 0x07;
sb.AppendLine();
w16 = buffer[index + 3] == 0x1f && Encoding.ASCII.GetString(buffer, index + 4, 3) == "W16";
if (w16)
{
index += 5;
}
index += 2;
}
else
else if (isUnicode)
{
if (buffer[index] == 0)
if (buffer[index] == MarkerEndOfUnicode)
{
sb.Append(Encoding.ASCII.GetString(buffer, index + 1, 1));
isUnicode = false;
}
else if (buffer.Length > index + 1)
else if (buffer[index] == MarkerReplaceEndOfUnicode)
{
if (CodePage == CodePageChineseSimplified)
{
sb.Append(Encoding.GetEncoding(EncodingChineseSimplified).GetString(buffer, index, 2));
}
else if (CodePage == CodePageKorean)
{
sb.Append(Encoding.GetEncoding(EncodingKorean).GetString(buffer, index, 2));
}
else if (CodePage == CodePageJapanese)
{
sb.Append(Encoding.GetEncoding(EncodingJapanese).GetString(buffer, index, 2));
}
else
{
sb.Append(Encoding.GetEncoding(EncodingChineseTraditional).GetString(buffer, index, 2));
}
sb.Append((char)MarkerEndOfUnicode);
}
else
{
var len = 1;
var b = buffer[index];
if (b >= 0xE0)
{
len = 3;
}
else if (b >= 0xC0)
{
len = 2;
}
index++;
if (buffer.Length > index + len - 1)
{
sb.Append(Encoding.UTF8.GetString(buffer, index, len));
}
index += len - 1;
}
}
}
else if (buffer[index] == 0xFE)
{
alignment = buffer[index + 1];
isSecondaryCodePage = (alignment & 0x08) != 0;
alignment &= 0x07;
sb.AppendLine();
index += 2;
}
else if (isUnicode)
{
if (buffer[index] == MarkerEndOfUnicode)
else if (buffer[index] == 0xFF)
{
isUnicode = false;
sb.Append(' ');
}
else if (buffer[index] == MarkerReplaceEndOfUnicode)
else if ((buffer[index] > 0x00 && buffer[index] < 0x08) || new byte[] { 0x00, 0x0b, 0x0d, 0x17, 0x1d }.Contains(buffer[index]))
{
sb.Append((char)MarkerEndOfUnicode);
// Ignore color switching codes for now.
// 0x01 Red, 0x02 Green, 0x03 Yellow, 0x04 Blue, 0x05 Magenta, 0x06 Cyan, 0x07 White
// 0x00 == ???
// 0x0b == ???
// 0x0d == ???
// 0x1D == Color before is background/box color, default is white foreground, black background
}
else if (CodePage == CodePageLatin || CodePage == CodePageLatinTurkish || CodePage == CodePageLatinCzech || CodePage == CodePageLatinPortuguese
|| (secondaryCodePageIsMain && !isSecondaryCodePage)
|| (hasStory && isStory))
{
sb.Append(GetLatinString(GetEncoding(CodePage), buffer, ref index, overrides));
}
else if (CodePage == CodePageArabic)
{
sb.Append(GetArabicString(buffer, ref index));
}
else if (CodePage == CodePageHebrew)
{
sb.Append(GetHebrewString(buffer, ref index));
}
else if (CodePage == CodePageCyrillic)
{
sb.Append(GetCyrillicString(buffer, ref index));
}
else if (CodePage == CodePageGreek)
{
sb.Append(GetGreekString(buffer, ref index, secondaryCodePageIsMain && isSecondaryCodePage));
}
else if (CodePage == CodePageThai)
{
sb.Append(GetThaiString(buffer, ref index));
//sb.Append(GetEncoding(CodePage).GetString(buffer, index, 1).Replace("€", "ต"));
}
else
{
var len = 1;
var b = buffer[index];
if (b >= 0xE0)
{
len = 3;
}
else if (b >= 0xC0)
{
len = 2;
}
sb.Append(GetEncoding(CodePage).GetString(buffer, index, 1));
}
if (buffer.Length > index + len - 1)
{
sb.Append(Encoding.UTF8.GetString(buffer, index, len));
}
index++;
}
index += len - 1;
if (index + 20 >= buffer.Length)
{
return null;
}
p.Text = sb.ToString();
p.Text = p.Text.Replace("\0", string.Empty);
p.Text = FixItalics(p.Text);
if (!(secondaryCodePageIsMain && !isSecondaryCodePage) && !(hasStory && isStory))
{
if (CodePage == CodePageArabic)
{
p.Text = Utilities.FixEnglishTextInRightToLeftLanguage(p.Text, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
}
else if (CodePage == CodePageHebrew)
{
p.Text = Utilities.FixEnglishTextInRightToLeftLanguage(p.Text, "0123456789abcdefghijklmnopqrstuvwxyz");
}
}
else if (buffer[index] == 0xFF)
if (verticalAlignment < 5)
{
sb.Append(' ');
if (alignment == 1) // left
{
p.Text = "{\\an7}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an9}" + p.Text;
}
else
{
p.Text = "{\\an8}" + p.Text;
}
p.MarginV = verticalAlignment < 2 ? null : (verticalAlignment * 100 / 12.0).ToString(CultureInfo.InvariantCulture) + "%";
}
else if (CodePage == CodePageLatin || CodePage == CodePageLatinTurkish || CodePage == CodePageLatinCzech
|| (usesSecondaryCodePage && !isSecondaryCodePage))
else if (verticalAlignment < 9)
{
sb.Append(GetLatinString(GetEncoding(CodePage), buffer, ref index, overrides));
}
else if (CodePage == CodePageArabic)
{
sb.Append(GetArabicString(buffer, ref index));
}
else if (CodePage == CodePageHebrew)
{
sb.Append(GetHebrewString(buffer, ref index));
}
else if (CodePage == CodePageCyrillic)
{
sb.Append(GetCyrillicString(buffer, ref index));
}
else if (CodePage == CodePageGreek)
{
sb.Append(GetGreekString(buffer, ref index));
}
else if (CodePage == CodePageThai)
{
sb.Append(GetThaiString(buffer, ref index));
//sb.Append(GetEncoding(CodePage).GetString(buffer, index, 1).Replace("€", "ต"));
if (alignment == 1) // left
{
p.Text = "{\\an4}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an6}" + p.Text;
}
else
{
p.Text = "{\\an5}" + p.Text;
}
var linesAfter = 12 - verticalAlignment - Math.Max(1, Utilities.GetNumberOfLines(p.Text));
p.MarginV = linesAfter <= 0 ? null : (linesAfter * 100 / 12.0).ToString(CultureInfo.InvariantCulture) + "%";
}
else
{
sb.Append(GetEncoding(CodePage).GetString(buffer, index, 1));
if (alignment == 1) // left
{
p.Text = "{\\an1}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an3}" + p.Text;
}
var linesAfter = 12 - verticalAlignment - Math.Max(1, Utilities.GetNumberOfLines(p.Text));
p.MarginV = linesAfter <= 0 ? null : (linesAfter * 100 / 12.0).ToString(CultureInfo.InvariantCulture) + "%";
}
index++;
}
// Remove position tags
var indexOfPositioningCodes = p.Text.IndexOf("\u002e\u001f", StringComparison.Ordinal);
if (indexOfPositioningCodes > 0)
{
p.Text = p.Text.Substring(0, indexOfPositioningCodes + 1);
}
if (index + 20 >= buffer.Length)
{
return null;
p.Text = p.Text.RemoveControlCharactersButWhiteSpace().TrimEnd();
return p;
}
p.Text = sb.ToString();
p.Text = p.Text.Replace("\0", string.Empty);
p.Text = FixItalics(p.Text);
if (CodePage == CodePageArabic)
catch (InvalidOperationException ex)
{
p.Text = Utilities.FixEnglishTextInRightToLeftLanguage(p.Text, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
throw new InvalidOperationException($"Error in paragraph {p.StartTime} after '{sb}': {ex.Message}", ex);
}
else if (CodePage == CodePageHebrew)
{
p.Text = Utilities.FixEnglishTextInRightToLeftLanguage(p.Text, "0123456789abcdefghijklmnopqrstuvwxyz");
}
if (verticalAlignment < 5)
{
if (alignment == 1) // left
{
p.Text = "{\\an7}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an9}" + p.Text;
}
else
{
p.Text = "{\\an8}" + p.Text;
}
}
else if (verticalAlignment < 9)
{
if (alignment == 1) // left
{
p.Text = "{\\an4}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an6}" + p.Text;
}
else
{
p.Text = "{\\an5}" + p.Text;
}
}
else
{
if (alignment == 1) // left
{
p.Text = "{\\an1}" + p.Text;
}
else if (alignment == 0) // right
{
p.Text = "{\\an3}" + p.Text;
}
}
// Remove position tags
var indexOfPositioningCodes = p.Text.IndexOf("\u002e\u001f", StringComparison.Ordinal);
if (indexOfPositioningCodes > 0)
{
p.Text = p.Text.Substring(0, indexOfPositioningCodes + 1);
}
p.Text = p.Text.RemoveControlCharactersButWhiteSpace().TrimEnd();
return p;
}
/// <summary>
@ -1992,6 +2111,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
switch (codePage)
{
case CodePageLatin:
case CodePageLatinPortuguese:
return Encoding.GetEncoding("iso-8859-1");
case CodePageGreek:
return Encoding.GetEncoding("iso-8859-7");
@ -2019,9 +2139,10 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
var dictionary = new Dictionary<int, string>
{
{ CodePageLatin, "en-da-no-sv-es-it-fr-pt-de-nl-pl-sq-hr-sr-ro-id" },
{ CodePageLatin, "en-da-no-sv-es-it-fr-de-nl-pl-sq-hr-sr-ro-id" },
{ CodePageGreek, "el" },
{ CodePageLatinCzech, "cs-sk" },
{ CodePageLatinCzech, "cs-sk-cz" },
{ CodePageLatinPortuguese, "pt" },
{ CodePageLatinTurkish, "tr" },
{ CodePageCyrillic, "bg-ru-uk-mk" },
{ CodePageHebrew, "he" },
@ -2441,7 +2562,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
}
}
if (arabicCharacter == null && b >= 0x20 && b < 0x70)
if (arabicCharacter == null && b >= 0x20 && b < 0x80)
{
return new SpecialCharacter(Encoding.ASCII.GetString(buffer, index, 1));
}
@ -2452,7 +2573,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
public static string GetHebrewString(byte[] buffer, ref int index)
{
var b = buffer[index];
if (b >= 0x20 && b < 0x70 && b != 44)
if (b >= 0x20 && b < 0x80 && b != 44)
{
return Encoding.ASCII.GetString(buffer, index, 1);
}
@ -2557,7 +2678,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return string.Empty;
}
public static string GetGreekString(byte[] buffer, ref int index)
public static string GetGreekString(byte[] buffer, ref int index, bool isSecondary)
{
var b = buffer[index];
@ -2566,9 +2687,9 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return Encoding.ASCII.GetString(buffer, index, 1);
}
if (GreekCodes.ContainsKey(b))
if (isSecondary && (buffer[index] == 0x2d || buffer[index] == 0x5f))
{
return GreekCodes[b].Character;
return GetLatinString(Encoding.ASCII, buffer, ref index, new Dictionary<int, SpecialCharacter>());
}
if (buffer.Length > index + 2)
@ -2588,6 +2709,11 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
}
}
if (GreekCodes.ContainsKey(b))
{
return GreekCodes[b].Character;
}
if (ThrowOnError)
{
throw new InvalidOperationException($"Unknown byte ({b}) in subtitle file @ binary offset {index}.");
@ -2630,6 +2756,12 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return string.Empty;
}
public static string GetPortugueseString(byte[] buffer, ref int index)
{
var encoding = GetEncoding(CodePageLatinPortuguese);
return GetLatinString(encoding, buffer, ref index, LatinPortugueseOverrides);
}
internal static TimeCode GetTimeCode(int timeCodeIndex, byte[] buffer)
{
if (timeCodeIndex > 0)

View File

@ -38,9 +38,18 @@
//
// comboBoxCodePage
//
this.comboBoxCodePage.BackColor = System.Drawing.SystemColors.Window;
this.comboBoxCodePage.BackColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(240)))), ((int)(((byte)(240)))), ((int)(((byte)(240)))));
this.comboBoxCodePage.BorderColor = System.Drawing.Color.FromArgb(((int)(((byte)(171)))), ((int)(((byte)(173)))), ((int)(((byte)(179)))));
this.comboBoxCodePage.BorderColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(120)))), ((int)(((byte)(120)))), ((int)(((byte)(120)))));
this.comboBoxCodePage.ButtonForeColor = System.Drawing.SystemColors.ControlText;
this.comboBoxCodePage.ButtonForeColorDown = System.Drawing.Color.Orange;
this.comboBoxCodePage.ButtonForeColorOver = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.comboBoxCodePage.DropDownHeight = 400;
this.comboBoxCodePage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxCodePage.DropDownWidth = 203;
this.comboBoxCodePage.FormattingEnabled = true;
this.comboBoxCodePage.Items.AddRange(new object[] {
this.comboBoxCodePage.Items.AddRange(new string[] {
"Latin",
"Greek",
"Latin Czech",
@ -51,11 +60,17 @@
"Chinese Traditional (Big5)",
"Chinese Simplified (gb2312)",
"Korean",
"Japanese"});
"Japanese",
"Portuguese"});
this.comboBoxCodePage.Location = new System.Drawing.Point(12, 37);
this.comboBoxCodePage.MaxLength = 32767;
this.comboBoxCodePage.Name = "comboBoxCodePage";
this.comboBoxCodePage.SelectedIndex = -1;
this.comboBoxCodePage.SelectedItem = null;
this.comboBoxCodePage.SelectedText = "";
this.comboBoxCodePage.Size = new System.Drawing.Size(203, 21);
this.comboBoxCodePage.TabIndex = 0;
this.comboBoxCodePage.UsePopupWindow = false;
this.comboBoxCodePage.SelectedIndexChanged += new System.EventHandler(this.comboBoxCodePage_SelectedIndexChanged);
//
// labelCodePage
@ -81,6 +96,7 @@
this.textBoxPreview.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.textBoxPreview.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.textBoxPreview.Location = new System.Drawing.Point(12, 89);
this.textBoxPreview.Multiline = true;
this.textBoxPreview.Name = "textBoxPreview";

View File

@ -4,6 +4,7 @@ using Nikse.SubtitleEdit.Logic;
using System;
using System.Text;
using System.Windows.Forms;
using static System.Net.Mime.MediaTypeNames;
namespace Nikse.SubtitleEdit.Forms
{
@ -117,7 +118,11 @@ namespace Nikse.SubtitleEdit.Forms
}
else if (CodePageIndex == Pac.CodePageGreek)
{
sb.Append(Pac.GetGreekString(_previewBuffer, ref index));
sb.Append(Pac.GetGreekString(_previewBuffer, ref index, false));
}
else if (CodePageIndex == Pac.CodePageLatinPortuguese)
{
sb.Append(Pac.GetPortugueseString(_previewBuffer, ref index));
}
else
{