Merge pull request #4007 from NickZ/encoding-linux

Fix encoding detection on non-Windows platforms
This commit is contained in:
Nikolaj Olsson 2020-02-23 11:31:11 +01:00 committed by GitHub
commit ae09ddc9c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 55 deletions

View File

@ -104,7 +104,7 @@ ECHO Merging assemblies with ILRepack...
FOR /D %%A IN (packages\ILRepack.*) DO (SET "ILREPACKDIR=%%A")
ECHO.
"%ILREPACKDIR%\tools\ILRepack.exe" /parallel /internalize /targetplatform:v4 /out:"bin\Release\SubtitleEdit.exe" "bin\Release\SubtitleEdit.exe"^
"bin\Release\libse.dll" "bin\Release\zlib.net.dll" "bin\Release\NHunspell.dll" "DLLs\Interop.QuartzTypeLib.dll"
"bin\Release\libse.dll" "bin\Release\zlib.net.dll" "bin\Release\NHunspell.dll" "bin\Release\UtfUnknown.dll" "DLLs\Interop.QuartzTypeLib.dll"
IF %ERRORLEVEL% NEQ 0 GOTO EndWithError
POPD

View File

@ -1,6 +1,7 @@
// Ripped from http://www.codeproject.com/KB/recipes/DetectEncoding.aspx
using Nikse.SubtitleEdit.Core.DetectEncoding.Multilang;
using UtfUnknown;
using System;
using System.Collections.Generic;
using System.IO;
@ -385,59 +386,21 @@ namespace Nikse.SubtitleEdit.Core.DetectEncoding
{
return new[] { Encoding.ASCII };
}
// expand the string to be at least 256 bytes
if (input.Length < 256)
{
byte[] newInput = new byte[256];
int steps = 256 / input.Length;
for (int i = 0; i < steps; i++)
{
Array.Copy(input, 0, newInput, input.Length * i, input.Length);
}
int rest = 256 % input.Length;
if (rest > 0)
{
Array.Copy(input, 0, newInput, steps * input.Length, rest);
}
input = newInput;
}
// Use UTF.Unknown to detect from input byte string
DetectionResult detectionResult = CharsetDetector.DetectFromBytes(input);
List<Encoding> result = new List<Encoding>();
// get the IMultiLanguage" interface
IMultiLanguage2 multilang2 = new CMultiLanguageClass();
try
//add in order (results ordered from most likely to least likely)
foreach (var detected in detectionResult.Details)
{
DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings];
int scores = detectedEncdings.Length;
int srcLen = input.Length;
// setup options (none)
const MLDETECTCP options = MLDETECTCP.MLDETECTCP_NONE;
// finally... call to DetectInputCodepage
multilang2.DetectInputCodepage(options, 0,
ref input[0], ref srcLen, ref detectedEncdings[0], ref scores);
// get result
if (scores > 0)
result.Add(detected.Encoding);
if (result.Count == maxEncodings)
{
for (int i = 0; i < scores; i++)
{
// add the result
result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage));
}
break;
}
}
finally
{
Marshal.FinalReleaseComObject(multilang2);
}
// nothing found
return result.ToArray();
}

View File

@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
@ -1053,11 +1053,6 @@ namespace Nikse.SubtitleEdit.Core
public static Encoding DetectAnsiEncoding(byte[] buffer)
{
if (Utilities.IsRunningOnMono())
{
return Encoding.Default;
}
try
{
var greekEncoding = Encoding.GetEncoding(1253); // Greek

View File

@ -38,10 +38,11 @@
<ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.1' ">
<PackageReference Include="System.Drawing.Common" Version="4.7.0" />
<PackageReference Include="Microsoft.Win32.Registry" Version="4.7.0"/>
<PackageReference Include="Microsoft.Win32.Registry" Version="4.7.0" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="UTF.Unknown" Version="2.3.0" />
<PackageReference Include="zlib.net-mutliplatform" Version="1.0.4" />
</ItemGroup>