diff --git a/src/libse/Common/FileUtil.cs b/src/libse/Common/FileUtil.cs index f9fa0760c..81296a813 100644 --- a/src/libse/Common/FileUtil.cs +++ b/src/libse/Common/FileUtil.cs @@ -13,7 +13,7 @@ using System.Text.RegularExpressions; namespace Nikse.SubtitleEdit.Core.Common { /// - /// File related utilities. + /// Provides utility methods for file operations and file type identification. /// public static class FileUtil { @@ -21,7 +21,7 @@ namespace Nikse.SubtitleEdit.Core.Common /// Opens a binary file in read/write shared mode, reads the contents of the file into a /// byte array, and then closes the file. /// - /// The file to open for reading. + /// The file to open for reading. /// A byte array containing the contents of the file. public static byte[] ReadAllBytesShared(string path) { @@ -51,6 +51,12 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Opens a binary file in read/write shared mode, reads the specified number of bytes from the file into a byte array, and then closes the file. + /// + /// The file to open for reading. + /// The number of bytes to read from the file. + /// A byte array containing the specified number of bytes read from the file. public static byte[] ReadBytesShared(string path, int bytesToRead) { using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -80,6 +86,13 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Opens a text file in read/write shared mode, reads all lines of the file into a list of strings, + /// and then closes the file. + /// + /// The file to open for reading. + /// The encoding used to decode the text from the file. + /// A list of strings containing all lines of the file. public static List ReadAllLinesShared(string path, Encoding encoding) { var bytes = ReadAllBytesShared(path); @@ -96,6 +109,13 @@ namespace Nikse.SubtitleEdit.Core.Common return encoding.GetString(bytes).SplitToLines(); } + /// + /// Opens a text file in read/write shared mode, reads the contents of the file into a + /// string, and then closes the file. + /// + /// The file to open for reading. + /// The encoding applied to the contents of the file. + /// A string containing the contents of the file. public static string ReadAllTextShared(string path, Encoding encoding) { var bytes = ReadAllBytesShared(path); @@ -112,6 +132,11 @@ namespace Nikse.SubtitleEdit.Core.Common return encoding.GetString(bytes); } + /// + /// Determines whether the specified file is a ZIP archive based on its header bytes. + /// + /// The name of the file to check. + /// True if the file is a ZIP archive; otherwise, false. public static bool IsZip(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -130,6 +155,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if the specified file is a 7-Zip file by reading its signature bytes. + /// + /// The path to the file to check. + /// True if the file is a 7-Zip file; otherwise, false. public static bool Is7Zip(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -150,6 +180,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if the given file is an MP3 file by examining its file headers and extension. + /// + /// The path of the file to check. + /// True if the file is an MP3 file; otherwise, false. public static bool IsMp3(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -167,6 +202,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if the specified file is a WAV audio file by examining its header and file extension. + /// + /// The name of the file to check. + /// True if the file is a WAV audio file; otherwise, false. public static bool IsWav(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -192,6 +232,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines if the specified file is a RAR archive by checking its magic number. + /// + /// The name of the file to check. + /// True if the file is a RAR archive; otherwise, false. public static bool IsRar(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -210,6 +255,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if the specified file is a PNG image by reading its header bytes. + /// + /// The name of the file to check. + /// True if the file is a PNG image; otherwise, false. public static bool IsPng(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -232,6 +282,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if the specified file is an SRR (Sample Rate Reduction) file by reading its initial bytes. + /// + /// The path to the file to check. + /// True if the file is an SRR file, otherwise false. public static bool IsSrr(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -249,6 +304,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if the file is a JPG by reading the header of the file. + /// + /// The path to the file to check. + /// True if the file is a JPG, otherwise false. public static bool IsJpg(string fileName) { // jpeg header - always starts with FFD8 (Start Of Image marker) + FF + a unknown byte (most often E0 or E1 though) @@ -267,6 +327,12 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines if the specified file is a Torrent file by reading its initial bytes and + /// checking for the presence of a specific Torrent file signature. + /// + /// The name of the file to verify. + /// True if the file is a Torrent file; otherwise, false. public static bool IsTorrentFile(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -277,6 +343,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if a given file is a Blu-ray subtitle (.sup) file by reading its first two bytes. + /// + /// The file to check for Blu-ray subtitle format. + /// True if the file is a Blu-ray subtitle file, otherwise false. public static bool IsBluRaySup(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -288,6 +359,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines if a file is a transport stream by inspecting its contents. + /// + /// The name of the file to check. + /// True if the file is identified as a transport stream; otherwise, false. public static bool IsTransportStream(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -312,6 +388,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines whether the specified file is a M2 transport stream. + /// + /// The name of the file to check. + /// true if the file is a M2 transport stream; otherwise, false. public static bool IsM2TransportStream(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -320,6 +401,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines whether the specified file is an MPEG-2 Private Stream 2 file. + /// + /// The file to check. + /// True if the specified file is an MPEG-2 Private Stream 2 file, otherwise false. public static bool IsMpeg2PrivateStream2(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -330,6 +416,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines whether the specified file is a VobSub subtitle file. + /// + /// The path of the file to check. + /// True if the file is a VobSub subtitle file; otherwise, false. public static bool IsVobSub(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -341,6 +432,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines whether the specified file is a Manzanita file by reading the first 17 bytes. + /// + /// The name of the file to check. + /// True if the file is a Manzanita file; otherwise, false. public static bool IsManzanita(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -352,6 +448,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines if the specified file is a SP (Subtitle Processor) DVD SUP file format. + /// + /// The path of the file to check. + /// True if the file is a SP DVD SUP file; otherwise, false. public static bool IsSpDvdSup(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -423,6 +524,11 @@ namespace Nikse.SubtitleEdit.Core.Common return false; } + /// + /// Determines whether a file is in Rich Text Format (RTF). + /// + /// The path to the file to check. + /// true if the file is in RTF format; otherwise, false. public static bool IsRtf(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -440,6 +546,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if a file contains a UTF-8 byte order mark (BOM). + /// + /// The name of the file to check. + /// True if the file starts with a UTF-8 BOM; otherwise, false. public static bool HasUtf8Bom(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -450,6 +561,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines if a specified subtitle file consists entirely of binary zeroes. + /// + /// The path to the subtitle file to be checked. + /// True if the file consists entirely of binary zeroes; otherwise, false. public static bool IsSubtitleFileAllBinaryZeroes(string fileName) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -476,6 +592,11 @@ namespace Nikse.SubtitleEdit.Core.Common return true; } + /// + /// Determines whether the specified path represents a file. + /// + /// The path to check. + /// True if the specified path is a file; otherwise, false. public static bool IsFile(string path) { if (!Path.IsPathRooted(path)) @@ -486,6 +607,11 @@ namespace Nikse.SubtitleEdit.Core.Common return (File.GetAttributes(path) & FileAttributes.Directory) != FileAttributes.Directory; } + /// + /// Determines if the specified path refers to an existing directory. + /// + /// The path to check. + /// true if the path refers to an existing directory; otherwise, false. public static bool IsDirectory(string path) { if (!Path.IsPathRooted(path)) @@ -496,6 +622,11 @@ namespace Nikse.SubtitleEdit.Core.Common return (File.GetAttributes(path) & FileAttributes.Directory) == FileAttributes.Directory; } + /// + /// Determines whether the specified file is plain text based on its content and length. + /// + /// The path to the file to check. + /// True if the file is determined to be plain text; otherwise, false. public static bool IsPlainText(string fileName) { var fileInfo = new FileInfo(fileName); @@ -565,6 +696,11 @@ namespace Nikse.SubtitleEdit.Core.Common return numberCount < numberThreshold && letterCount > letterThreshold; } + /// + /// Attempts to read video information from the Matroska header of a specified file. + /// + /// The path to the Matroska file. + /// A VideoInfo object containing video properties if successful; otherwise, an object with Success set to false. public static VideoInfo TryReadVideoInfoViaMatroskaHeader(string fileName) { var info = new VideoInfo { Success = false }; @@ -587,6 +723,11 @@ namespace Nikse.SubtitleEdit.Core.Common return info; } + /// + /// Attempts to read video information from an AVI file header. + /// + /// The path to the AVI file to be read. + /// A object containing details about the video. If reading the information fails, the Success property of the returned object will be false. public static VideoInfo TryReadVideoInfoViaAviHeader(string fileName) { var info = new VideoInfo { Success = false }; @@ -619,6 +760,11 @@ namespace Nikse.SubtitleEdit.Core.Common return info; } + /// + /// Attempts to read video information from an MP4 file. + /// + /// The path to the MP4 file. + /// A VideoInfo object containing the video information, with the Success property indicating whether reading was successful. public static VideoInfo TryReadVideoInfoViaMp4(string fileName) { var info = new VideoInfo { Success = false }; @@ -646,11 +792,23 @@ namespace Nikse.SubtitleEdit.Core.Common return info; } + /// + /// Generates a unique temporary file name with the specified extension. + /// + /// The extension for the temporary file name. + /// A string containing the full path of the temporary file. public static string GetTempFileName(string extension) { return Path.GetTempPath() + Guid.NewGuid() + extension; } + /// + /// Writes the specified string to a file, using the specified encoding. If the encoding + /// is UTF-8 without BOM, it writes the content without a BOM. + /// + /// The file to write to. + /// The string to write to the file. + /// The encoding to use for writing the text. public static void WriteAllText(string fileName, string contents, TextEncoding encoding) { if (encoding.DisplayName == TextEncoding.Utf8WithoutBom) @@ -667,6 +825,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Writes specified text to a file with UTF-8 encoding, checking the global setting for the preferred encoding method (with or without BOM). + /// + /// The path and name of the file to write to. + /// The text content to be written to the file. public static void WriteAllTextWithDefaultUtf8(string fileName, string contents) { if (Configuration.Settings.General.DefaultEncoding == TextEncoding.Utf8WithoutBom) @@ -683,6 +846,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Determines whether the specified file is a valid Matroska file. + /// + /// The name of the file to check. + /// True if the file is a valid Matroska file; otherwise, false. public static bool IsMatroskaFile(string fileName) { using (var validator = new MatroskaFile(fileName)) @@ -691,6 +859,11 @@ namespace Nikse.SubtitleEdit.Core.Common } } + /// + /// Checks if a file is locked by attempting to open it with exclusive read access. + /// + /// The name of the file to check. + /// True if the file is locked, otherwise false. public static bool IsFileLocked(string fileName) { try @@ -709,6 +882,12 @@ namespace Nikse.SubtitleEdit.Core.Common return false; } + /// + /// Searches for a subtitle file that matches the given video file in specified directories. + /// + /// The base path to search for subtitle files. + /// The video file name for which a matching subtitle is being sought. + /// The full path of the found subtitle file, or an empty string if no matching subtitle is found. public static string TryLocateSubtitleFile(string path, string videoFileName) { // search in these subdirectories: \Subs;\Sub;\Subtitles; diff --git a/src/libse/Common/HtmlUtil.cs b/src/libse/Common/HtmlUtil.cs index 180a1f341..3086e702e 100644 --- a/src/libse/Common/HtmlUtil.cs +++ b/src/libse/Common/HtmlUtil.cs @@ -14,10 +14,29 @@ namespace Nikse.SubtitleEdit.Core.Common /// public static class HtmlUtil { + /// + /// Represents the HTML tag used for italic text formatting. + /// public static string TagItalic => "i"; + + /// + /// Represents the HTML tag used for bold text formatting. + /// public static string TagBold => "b"; + + /// + /// Represents the HTML tag used for underlined text formatting. + /// public static string TagUnderline => "u"; + + /// + /// Represents the HTML tag used for specifying font attributes. + /// public static string TagFont => "font"; + + /// + /// Represents the HTML tag used for the Cyrillic character 'і'. + /// public static string TagCyrillicI => "\u0456"; // Cyrillic Small Letter Byelorussian-Ukrainian i (http://graphemica.com/%D1%96) private static readonly Regex TagOpenRegex = new Regex(@"<\s*(?:/\s*)?(\w+)[^>]*>", RegexOptions.Compiled); @@ -367,6 +386,12 @@ namespace Nikse.SubtitleEdit.Core.Common return encoded.ToString(); } + /// + /// Remove all HTML tags from the input string, optionally including SSA tags. + /// + /// The input string that may contain HTML tags. + /// A boolean value indicating whether SSA tags should also be removed. + /// A new string with all HTML tags removed, and optionally SSA tags removed. public static string RemoveHtmlTags(string input, bool alsoSsaTags = false) { if (input == null || input.Length < 3) @@ -523,6 +548,11 @@ namespace Nikse.SubtitleEdit.Core.Common return new string(array, 0, arrayIndex); } + /// + /// Determines whether the specified text is a URL. + /// + /// The text to evaluate. + /// True if the text is considered a URL, otherwise false. public static bool IsUrl(string text) { if (string.IsNullOrWhiteSpace(text) || text.Length < 6 || text.IndexOf('.') < 0 || text.IndexOf(' ') >= 0) @@ -546,6 +576,11 @@ namespace Nikse.SubtitleEdit.Core.Common return false; } + /// + /// Determines if the provided text starts with a URL-like string. + /// + /// The text to examine. + /// True if the text starts with a URL-like string; otherwise, false. public static bool StartsWithUrl(string text) { if (string.IsNullOrWhiteSpace(text)) @@ -564,6 +599,11 @@ namespace Nikse.SubtitleEdit.Core.Common private static readonly string[] UppercaseTags = { "", "", "", "", "", "", "" }; + /// + /// Converts all uppercase HTML tags in the input string to lowercase. + /// + /// The input string containing HTML tags to be converted. + /// A new string with all uppercase HTML tags converted to lowercase. public static string FixUpperTags(string input) { if (string.IsNullOrEmpty(input) || input.IndexOf('<') < 0) @@ -588,6 +628,11 @@ namespace Nikse.SubtitleEdit.Core.Common return text; } + /// + /// Determines if the provided text contains formattable content not enclosed within HTML-like tags. + /// + /// The input text to be checked. + /// True if the text contains any formattable content (letters or digits) outside of HTML-like tags; otherwise, false. public static bool IsTextFormattable(in string text) { if (string.IsNullOrEmpty(text)) @@ -641,7 +686,12 @@ namespace Nikse.SubtitleEdit.Core.Common "< / i >", "< /i>", "", "< /i >", "", "", "< / i>", "", "< / I >", "< /I>", "", "< /I >", "", "", "< / I>", "" }; - + + /// + /// Fix invalid or improperly formatted italic tags in the input HTML string. + /// + /// The input HTML string to process. + /// A string with corrected italic tags. public static string FixInvalidItalicTags(string input) { var text = input; @@ -1000,6 +1050,14 @@ namespace Nikse.SubtitleEdit.Core.Common return preTags + text; } + /// + /// Toggles the specified HTML or SSA/ASS tag on or off in the provided text. + /// + /// The input string to apply the tag toggle. + /// The HTML or SSA/ASS tag to be toggled. + /// Specifies whether the whole line should be toggled or just part of it. + /// Indicates whether the text contains SSA/ASS tags. + /// A new string with the specified tag toggled. public static string ToggleTag(string input, string tag, bool wholeLine, bool assa) { var text = input; @@ -1060,6 +1118,14 @@ namespace Nikse.SubtitleEdit.Core.Common return text; } + /// + /// Determines if the specified tag is present in the input HTML or SSA/ASS string. + /// + /// The input string to search for the specified tag. + /// The HTML or SSA/ASS tag to check for. + /// Specifies whether the search should consider the whole line. + /// Indicates if the input string is in SSA/ASS format. + /// True if the tag is present in the input; otherwise, false. public static bool IsTagOn(string input, string tag, bool wholeLine, bool assa) { var text = input; @@ -1079,6 +1145,14 @@ namespace Nikse.SubtitleEdit.Core.Common text.IndexOf("", StringComparison.OrdinalIgnoreCase) >= 0; } + /// + /// Applies the specified HTML or ASSA tag to the input string. + /// + /// The input string to which the tag will be applied. + /// The tag to apply to the input string. + /// If true, the tag is applied to the entire line; otherwise, the tag is applied to a portion of the line. + /// If true, the tag is treated as an ASSA tag; otherwise, it is treated as an HTML tag. + /// A new string with the specified tag applied. public static string TagOn(string input, string tag, bool wholeLine, bool assa) { var text = input; @@ -1125,6 +1199,14 @@ namespace Nikse.SubtitleEdit.Core.Common return text; } + /// + /// Remove the specified HTML tag from the input string. + /// + /// The input string containing HTML tags. + /// The HTML tag to be removed. + /// Indicates whether the operation applies to the whole line. + /// Indicates whether ASSA (Advanced SubStation Alpha) tags are used. + /// A new string with the specified tag removed. public static string TagOff(string input, string tag, bool wholeLine, bool assa) { var text = input; @@ -1174,6 +1256,12 @@ namespace Nikse.SubtitleEdit.Core.Common return text; } + /// + /// Converts a string representation of a color to a Color object. The string can be in various formats such as + /// "rgb(r, g, b)", "rgba(r, g, b, a)", or a hex color string like "#RRGGBB" or "#RRGGBBAA". + /// + /// The string representation of the color. + /// A Color object corresponding to the input string. If the string cannot be parsed, the default color is white. + /// Remove color tags from the input string, adjusting for potentially surrounding font tags. + /// + /// The string from which to remove color tags. + /// A new string with color tags removed. public static string RemoveColorTags(string input) { var r = new Regex("[ ]*(COLOR|color|Color)=[\"']*[#\\dA-Za-z]*[\"']*[ ]*"); @@ -1325,6 +1418,11 @@ namespace Nikse.SubtitleEdit.Core.Common return s; } + /// + /// Removes ASS and SSA alignment tags from the given string. + /// + /// The input string from which to remove the alignment tags. + /// A new string without ASS and SSA alignment tags. public static string RemoveAssAlignmentTags(string s) { return s.Replace("{\\an1}", string.Empty) // ASS tags alone @@ -1378,6 +1476,11 @@ namespace Nikse.SubtitleEdit.Core.Common .Replace("{\\a9}", string.Empty); } + /// + /// Remove color tags specific to Advanced SubStation Alpha (ASSA) format from the input string. + /// + /// The input string potentially containing ASSA color tags. + /// A new string with all ASSA color tags removed. public static string RemoveAssaColor(string input) { var text = input; @@ -1388,7 +1491,12 @@ namespace Nikse.SubtitleEdit.Core.Common text = Regex.Replace(text, "\\\\1c&[abcdefghABCDEFGH\\d]*&", string.Empty); return text; } - + + /// + /// Gets the closing HTML tag for the specified opening tag. + /// + /// The opening HTML tag to find the closing pair for. + /// The closing HTML tag corresponding to the specified opening tag. public static string GetClosingPair(string tag) { switch (tag) @@ -1399,11 +1507,26 @@ namespace Nikse.SubtitleEdit.Core.Common } return tag.StartsWith("" : string.Empty; } - + + /// + /// Get the corresponding closing character for a given opening character. + /// + /// The opening character to find the closing pair for. + /// The corresponding closing character. public static char GetClosingPair(char ch) => ch == '<' ? '>' : '}'; + /// + /// Determines if the provided HTML tag is an opening tag. + /// + /// The HTML tag to check. + /// True if the tag is an opening tag, otherwise false. tag.Length > 1 && tag[1] != '/'; + /// + /// Determines whether the specified character is a start tag symbol for HTML or similar markup. + /// + /// The character to check. + /// True if the character is a start tag symbol; otherwise, false. public static bool IsStartTagSymbol(char ch) => ch == '<' || ch == '{'; } }