diff --git a/src/Logic/DetectEncoding/EncodingTools.cs b/src/Logic/DetectEncoding/EncodingTools.cs
new file mode 100644
index 000000000..54fb96fe1
--- /dev/null
+++ b/src/Logic/DetectEncoding/EncodingTools.cs
@@ -0,0 +1,529 @@
+// Ripped from http://www.codeproject.com/KB/recipes/DetectEncoding.aspx
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Runtime.InteropServices;
+using System.IO;
+
+namespace Nikse.SubtitleEdit.Logic.DetectEncoding
+{
+ public static class EncodingTools
+ {
+ // this only contains ascii, default windows code page and unicode
+ public static int[] PreferedEncodingsForStream;
+
+ // this contains all codepages, sorted by preference and byte usage
+ public static int[] PreferedEncodings;
+
+ // this contains all codepages, sorted by preference and byte usage
+ public static int[] AllEncodings;
+
+
+
+ ///
+ /// Static constructor that fills the default preferred codepages
+ ///
+ static EncodingTools()
+ {
+
+ List streamEcodings= new List();
+ List allEncodings = new List();
+ List mimeEcodings = new List();
+
+ // asscii - most simple so put it in first place...
+ streamEcodings.Add(Encoding.ASCII.CodePage);
+ mimeEcodings.Add(Encoding.ASCII.CodePage);
+ allEncodings.Add(Encoding.ASCII.CodePage);
+
+
+ // add default 2nd for all encodings
+ allEncodings.Add(Encoding.Default.CodePage);
+ // default is single byte?
+ if (Encoding.Default.IsSingleByte)
+ {
+ // put it in second place
+ streamEcodings.Add(Encoding.Default.CodePage);
+ mimeEcodings.Add(Encoding.Default.CodePage);
+ }
+
+
+
+ // prefer JIS over JIS-SHIFT (JIS is detected better than JIS-SHIFT)
+ // this one does include cyrilic (strange but true)
+ allEncodings.Add(50220);
+ mimeEcodings.Add(50220);
+
+
+ // always allow unicode flavours for streams (they all have a preamble)
+ streamEcodings.Add(Encoding.Unicode.CodePage);
+ foreach (EncodingInfo enc in Encoding.GetEncodings())
+ {
+ if (!streamEcodings.Contains(enc.CodePage))
+ {
+ Encoding encoding = Encoding.GetEncoding(enc.CodePage);
+ if (encoding.GetPreamble().Length > 0)
+ streamEcodings.Add(enc.CodePage);
+ }
+ }
+
+
+ // stream is done here
+ PreferedEncodingsForStream = streamEcodings.ToArray();
+
+
+ // all singlebyte encodings
+ foreach (EncodingInfo enc in Encoding.GetEncodings())
+ {
+
+
+ if (!enc.GetEncoding().IsSingleByte)
+ continue;
+
+ if (!allEncodings.Contains(enc.CodePage))
+ allEncodings.Add(enc.CodePage);
+
+ // only add iso and IBM encodings to mime encodings
+ if (enc.CodePage <= 1258)
+ {
+ mimeEcodings.Add(enc.CodePage);
+ }
+ }
+
+ // add the rest (multibyte)
+ foreach (EncodingInfo enc in Encoding.GetEncodings())
+ {
+ if (!enc.GetEncoding().IsSingleByte)
+ {
+ if (!allEncodings.Contains(enc.CodePage))
+ allEncodings.Add(enc.CodePage);
+
+ // only add iso and IBM encodings to mime encodings
+ if (enc.CodePage <= 1258)
+ {
+ mimeEcodings.Add(enc.CodePage);
+ }
+ }
+ }
+
+ // add unicodes
+ mimeEcodings.Add(Encoding.Unicode.CodePage);
+
+
+ PreferedEncodings = mimeEcodings.ToArray();
+ AllEncodings = allEncodings.ToArray();
+ }
+
+
+ ///
+ /// Checks if specified string data is acii data.
+ ///
+ ///
+ ///
+ public static bool IsAscii(string data)
+ {
+ // assume empty string to be ascii
+ if ((data == null) || (data.Length == 0))
+ return true;
+ foreach (char c in data)
+ {
+ if ((int)c > 127)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ ///
+ /// Gets the best Encoding for usage in mime encodings
+ ///
+ /// text to detect
+ /// the suggested encoding
+ public static Encoding GetMostEfficientEncoding(string input)
+ {
+ return GetMostEfficientEncoding(input, PreferedEncodings);
+ }
+
+ ///
+ /// Gets the best ISO Encoding for usage in a stream
+ ///
+ /// text to detect
+ /// the suggested encoding
+ public static Encoding GetMostEfficientEncodingForStream(string input)
+ {
+ return GetMostEfficientEncoding(input, PreferedEncodingsForStream);
+ }
+
+ ///
+ /// Gets the best fitting encoding from a list of possible encodings
+ ///
+ /// text to detect
+ /// an array of codepages
+ /// the suggested encoding
+ public static Encoding GetMostEfficientEncoding(string input, int[] preferedEncodings)
+ {
+ Encoding enc = DetectOutgoingEncoding(input,preferedEncodings,true);
+ // unicode.. hmmm... check for smallest encoding
+ if (enc.CodePage == Encoding.Unicode.CodePage)
+ {
+ int byteCount = Encoding.UTF7.GetByteCount(input);
+ enc = Encoding.UTF7;
+ int bestByteCount = byteCount;
+
+ // utf8 smaller?
+ byteCount = Encoding.UTF8.GetByteCount(input);
+ if (byteCount < bestByteCount)
+ {
+ enc = Encoding.UTF8;
+ bestByteCount = byteCount;
+ }
+
+ // unicode smaller?
+ byteCount = Encoding.Unicode.GetByteCount(input);
+ if (byteCount < bestByteCount)
+ {
+ enc = Encoding.Unicode;
+ bestByteCount = byteCount;
+ }
+ }
+ else
+ {
+
+ }
+ return enc;
+ }
+
+ public static Encoding DetectOutgoingEncoding(string input)
+ {
+ return DetectOutgoingEncoding(input, PreferedEncodings, true);
+ }
+
+ public static Encoding DetectOutgoingStreamEncoding(string input)
+ {
+ return DetectOutgoingEncoding(input, PreferedEncodingsForStream, true);
+ }
+
+ public static Encoding[] DetectOutgoingEncodings(string input)
+ {
+ return DetectOutgoingEncodings(input, PreferedEncodings, true);
+ }
+
+ public static Encoding[] DetectOutgoingStreamEncodings(string input)
+ {
+ return DetectOutgoingEncodings(input, PreferedEncodingsForStream, true);
+ }
+
+ private static Encoding DetectOutgoingEncoding(string input, int[] preferedEncodings, bool preserveOrder)
+ {
+
+ if (input == null)
+ throw new ArgumentNullException("input");
+
+ // empty strings can always be encoded as ASCII
+ if (input.Length == 0)
+ return Encoding.ASCII;
+
+ Encoding result = Encoding.ASCII;
+
+ // get the IMultiLanguage3 interface
+ MultiLanguage.IMultiLanguage3 multilang3 = new MultiLanguage.CMultiLanguageClass();
+ if (multilang3 == null)
+ throw new System.Runtime.InteropServices.COMException("Failed to get IMultilang3");
+ try
+ {
+ int[] resultCodePages = new int[preferedEncodings != null ? preferedEncodings.Length : Encoding.GetEncodings().Length];
+ uint detectedCodepages = (uint)resultCodePages.Length;
+ ushort specialChar = (ushort)'?';
+
+
+ // get unmanaged arrays
+ IntPtr pPrefEncs = preferedEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof(uint) * preferedEncodings.Length);
+ IntPtr pDetectedEncs = Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);
+
+ try
+ {
+ if (preferedEncodings != null)
+ Marshal.Copy(preferedEncodings, 0, pPrefEncs, preferedEncodings.Length);
+
+ Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);
+
+ MultiLanguage.MLCPF options = MultiLanguage.MLCPF.MLDETECTF_VALID_NLS;
+ if (preserveOrder)
+ options |= MultiLanguage.MLCPF.MLDETECTF_PRESERVE_ORDER;
+
+ if (preferedEncodings != null)
+ options |= MultiLanguage.MLCPF.MLDETECTF_PREFERRED_ONLY;
+
+ multilang3.DetectOutboundCodePage(options,
+ input, (uint)input.Length,
+ pPrefEncs, (uint) (preferedEncodings==null ? 0 : preferedEncodings.Length),
+
+ pDetectedEncs, ref detectedCodepages,
+ ref specialChar);
+
+ // get result
+ if (detectedCodepages > 0)
+ {
+ int[] theResult = new int[detectedCodepages];
+ Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);
+ result = Encoding.GetEncoding(theResult[0]);
+ }
+
+ }
+ finally
+ {
+ if (pPrefEncs != IntPtr.Zero)
+ Marshal.FreeCoTaskMem(pPrefEncs);
+ Marshal.FreeCoTaskMem(pDetectedEncs);
+ }
+ }
+ finally
+ {
+ Marshal.FinalReleaseComObject(multilang3);
+ }
+ return result;
+ }
+
+ public static Encoding[] DetectOutgoingEncodings(string input, int[] preferedEncodings, bool preserveOrder)
+ {
+
+ if (input == null)
+ throw new ArgumentNullException("input");
+
+ // empty strings can always be encoded as ASCII
+ if (input.Length == 0)
+ return new Encoding[] { Encoding.ASCII };
+
+ List result = new List();
+
+ // get the IMultiLanguage3 interface
+ MultiLanguage.IMultiLanguage3 multilang3 = new MultiLanguage.CMultiLanguageClass();
+ if (multilang3 == null)
+ throw new System.Runtime.InteropServices.COMException("Failed to get IMultilang3");
+ try
+ {
+ int[] resultCodePages = new int[preferedEncodings.Length];
+ uint detectedCodepages = (uint)resultCodePages.Length;
+ ushort specialChar = (ushort)'?';
+
+
+ // get unmanaged arrays
+ IntPtr pPrefEncs = Marshal.AllocCoTaskMem(sizeof(uint) * preferedEncodings.Length);
+ IntPtr pDetectedEncs = preferedEncodings == null ? IntPtr.Zero : Marshal.AllocCoTaskMem(sizeof(uint) * resultCodePages.Length);
+
+ try
+ {
+ if (preferedEncodings != null)
+ Marshal.Copy(preferedEncodings, 0, pPrefEncs, preferedEncodings.Length);
+
+ Marshal.Copy(resultCodePages, 0, pDetectedEncs, resultCodePages.Length);
+
+ MultiLanguage.MLCPF options = MultiLanguage.MLCPF.MLDETECTF_VALID_NLS | MultiLanguage.MLCPF.MLDETECTF_PREFERRED_ONLY;
+ if (preserveOrder)
+ options |= MultiLanguage.MLCPF.MLDETECTF_PRESERVE_ORDER;
+
+ if (preferedEncodings != null)
+ options |= MultiLanguage.MLCPF.MLDETECTF_PREFERRED_ONLY;
+
+ // finally... call to DetectOutboundCodePage
+ multilang3.DetectOutboundCodePage(options,
+ input, (uint)input.Length,
+ pPrefEncs, (uint) (preferedEncodings==null ? 0 : preferedEncodings.Length),
+ pDetectedEncs, ref detectedCodepages,
+ ref specialChar);
+
+ // get result
+ if (detectedCodepages > 0)
+ {
+ int[] theResult = new int[detectedCodepages];
+ Marshal.Copy(pDetectedEncs, theResult, 0, theResult.Length);
+
+
+ // get the encodings for the codepages
+ for (int i = 0; i < detectedCodepages; i++)
+ result.Add(Encoding.GetEncoding(theResult[i]));
+
+ }
+
+ }
+ finally
+ {
+ if (pPrefEncs != IntPtr.Zero)
+ Marshal.FreeCoTaskMem(pPrefEncs);
+ Marshal.FreeCoTaskMem(pDetectedEncs);
+ }
+ }
+ finally
+ {
+ Marshal.FinalReleaseComObject(multilang3);
+ }
+ // nothing found
+ return result.ToArray();
+ }
+
+
+ ///
+ /// Detect the most probable codepage from an byte array
+ ///
+ /// array containing the raw data
+ /// the detected encoding or the default encoding if the detection failed
+ public static Encoding DetectInputCodepage(byte[] input)
+ {
+ try
+ {
+ Encoding[] detected = DetectInputCodepages(input, 1);
+
+ if (detected.Length > 0)
+ return detected[0];
+ return Encoding.Default;
+ }
+ catch(COMException)
+ {
+ // return default codepage on error
+ return Encoding.Default;
+ }
+ }
+
+ ///
+ /// Rerurns up to maxEncodings codpages that are assumed to be apropriate
+ ///
+ /// array containing the raw data
+ /// maxiumum number of encodings to detect
+ /// an array of Encoding with assumed encodings
+ public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings)
+ {
+
+ if (maxEncodings < 1)
+ throw new ArgumentOutOfRangeException("at least one encoding must be returend", "maxEncodings");
+
+ if (input == null)
+ throw new ArgumentNullException("input");
+
+ // empty strings can always be encoded as ASCII
+ if (input.Length == 0)
+ return new Encoding[] { Encoding.ASCII };
+
+ // expand the string to be at least 256 bytes
+ if (input.Length < 256)
+ {
+ byte[] newInput = new byte[256];
+ int steps = 256 / input.Length;
+ for (int i = 0; i < steps; i++)
+ Array.Copy(input, 0, newInput, input.Length * i, input.Length);
+
+ int rest = 256 % input.Length;
+ if (rest > 0)
+ Array.Copy(input, 0, newInput, steps * input.Length, rest);
+ input = newInput;
+ }
+
+ List result = new List();
+
+ // get the IMultiLanguage" interface
+ MultiLanguage.IMultiLanguage2 multilang2 = new MultiLanguage.CMultiLanguageClass();
+ if (multilang2 == null)
+ throw new System.Runtime.InteropServices.COMException("Failed to get IMultilang2");
+ try
+ {
+ MultiLanguage.DetectEncodingInfo[] detectedEncdings = new MultiLanguage.DetectEncodingInfo[maxEncodings];
+
+ int scores = detectedEncdings.Length;
+ int srcLen = input.Length;
+
+ // setup options (none)
+ MultiLanguage.MLDETECTCP options = MultiLanguage.MLDETECTCP.MLDETECTCP_NONE;
+
+ // finally... call to DetectInputCodepage
+ multilang2.DetectInputCodepage(options,0,
+ ref input[0], ref srcLen, ref detectedEncdings[0], ref scores);
+
+ // get result
+ if (scores > 0)
+ {
+ for (int i = 0; i < scores; i++)
+ {
+ // add the result
+ result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage));
+ }
+ }
+ }
+ finally
+ {
+ Marshal.FinalReleaseComObject(multilang2);
+ }
+ // nothing found
+ return result.ToArray();
+ }
+
+
+ ///
+ /// Opens a text file and returns the content
+ /// encoded in the most probable encoding
+ ///
+ /// path to the souce file
+ /// the text content of the file
+ public static string ReadTextFile(string path)
+ {
+ if (path == null)
+ throw new ArgumentNullException("path");
+
+ using (Stream fs = File.Open(path, FileMode.Open))
+ {
+ byte[] rawData = new byte[fs.Length];
+ Encoding enc = DetectInputCodepage(rawData);
+ return enc.GetString(rawData);
+ }
+ }
+
+ ///
+ /// Returns a stream reader for the given
+ /// text file with the best encoding applied
+ ///
+ /// path to the file
+ /// a StreamReader for the file
+ public static StreamReader OpenTextFile(string path)
+ {
+ if (path == null)
+ throw new ArgumentNullException("path");
+ return OpenTextStream(File.Open(path, FileMode.Open));
+ }
+
+ ///
+ /// Creates a stream reader from a stream and detects
+ /// the encoding form the first bytes in the stream
+ ///
+ /// a stream to wrap
+ /// the newly created StreamReader
+ public static StreamReader OpenTextStream(Stream stream)
+ {
+ // check stream parameter
+ if (stream == null)
+ throw new ArgumentNullException("stream");
+ if (!stream.CanSeek)
+ throw new ArgumentException("the stream must support seek operations","stream");
+
+ // assume default encoding at first place
+ Encoding detectedEncoding = Encoding.Default;
+
+ // seek to stream start
+ stream.Seek(0, SeekOrigin.Begin);
+
+ // buffer for preamble and up to 512b sample text for dection
+ byte[] buf = new byte[System.Math.Min(stream.Length, 512)];
+
+ stream.Read(buf, 0, buf.Length);
+ detectedEncoding = DetectInputCodepage(buf);
+ // seek back to stream start
+ stream.Seek(0, SeekOrigin.Begin);
+
+
+ return new StreamReader(stream,detectedEncoding);
+
+ }
+
+ }
+
+
+}