From 2ebc3fa1203bf88c7dfc5da2e022d3f40ab04898 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sat, 22 Oct 2022 11:45:41 +0200 Subject: [PATCH 1/4] Initial new GetParagraphs --- src/libse/ContainerFormats/Mp4/Boxes/Mdhd.cs | 463 +----------------- src/libse/ContainerFormats/Mp4/Boxes/Mvhd.cs | 3 +- src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs | 310 +++++++++--- src/libse/ContainerFormats/Mp4/ChunkText.cs | 8 + .../ContainerFormats/Mp4/SampleToChunkMap.cs | 9 + 5 files changed, 278 insertions(+), 515 deletions(-) create mode 100644 src/libse/ContainerFormats/Mp4/ChunkText.cs create mode 100644 src/libse/ContainerFormats/Mp4/SampleToChunkMap.cs diff --git a/src/libse/ContainerFormats/Mp4/Boxes/Mdhd.cs b/src/libse/ContainerFormats/Mp4/Boxes/Mdhd.cs index ef85cd8de..b3cdbf414 100644 --- a/src/libse/ContainerFormats/Mp4/Boxes/Mdhd.cs +++ b/src/libse/ContainerFormats/Mp4/Boxes/Mdhd.cs @@ -1,14 +1,15 @@ -using System.Globalization; +using Nikse.SubtitleEdit.Core.Common; +using System.Globalization; using System.IO; +using System.Linq; namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes { /// - /// Media Header Box + /// Media Header Box. /// public class Mdhd : Box { - public readonly ulong CreationTime; public readonly ulong ModificationTime; public readonly ulong TimeScale; @@ -18,8 +19,13 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes public Mdhd(Stream fs, ulong size) { Buffer = new byte[size - 4]; - fs.Read(Buffer, 0, Buffer.Length); - int languageIndex = 20; + var bytesRead = fs.Read(Buffer, 0, Buffer.Length); + if (bytesRead < Buffer.Length) + { + return; + } + + var languageIndex = 20; int version = Buffer[0]; if (version == 0) { @@ -38,450 +44,21 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } // language code = skip first byte, 5 bytes + 5 bytes + 5 bytes (add 0x60 to get ascii value) - int languageByte = ((Buffer[languageIndex] << 1) >> 3) + 0x60; - int languageByte2 = ((Buffer[languageIndex] & 0x3) << 3) + (Buffer[languageIndex + 1] >> 5) + 0x60; - int languageByte3 = (Buffer[languageIndex + 1] & 0x1f) + 0x60; - char x = (char)languageByte; - char x2 = (char)languageByte2; - char x3 = (char)languageByte3; - Iso639ThreeLetterCode = x.ToString(CultureInfo.InvariantCulture) + x2.ToString(CultureInfo.InvariantCulture) + x3.ToString(CultureInfo.InvariantCulture); + var languageByte1 = ((Buffer[languageIndex] << 1) >> 3) + 0x60; + var languageByte2 = ((Buffer[languageIndex] & 0x3) << 3) + (Buffer[languageIndex + 1] >> 5) + 0x60; + var languageByte3 = (Buffer[languageIndex + 1] & 0x1f) + 0x60; + var x1 = (char)languageByte1; + var x2 = (char)languageByte2; + var x3 = (char)languageByte3; + Iso639ThreeLetterCode = x1.ToString(CultureInfo.InvariantCulture) + x2.ToString(CultureInfo.InvariantCulture) + x3.ToString(CultureInfo.InvariantCulture); } public string LanguageString { get { - switch (Iso639ThreeLetterCode) - { - case ("abk"): return "Abkhazian"; - case ("ace"): return "Achinese"; - case ("ach"): return "Acoli"; - case ("ada"): return "Adangme"; - case ("aar"): return "Afar"; - case ("afh"): return "Afrihili"; - case ("afr"): return "Afrikaans"; - case ("afa"): return "Afro-Asiatic (Other)"; - case ("aka"): return "Akan"; - case ("akk"): return "Akkadian"; - case ("alb"): - case ("sqi"): return "Albanian"; - case ("ale"): return "Aleut"; - case ("alg"): return "Algonquian languages"; - case ("tut"): return "Altaic (Other)"; - case ("amh"): return "Amharic"; - case ("apa"): return "Apache languages"; - case ("arc"): return "Aramaic"; - case ("ara"): - case ("arg"): return "Arabic"; - case ("arp"): return "Arapaho"; - case ("arn"): return "Araucanian"; - case ("arw"): return "Arawak"; - case ("arm"): - case ("hye"): return "Armenian"; - case ("art"): return "Artificial (Other)"; - case ("asm"): return "Assamese"; - case ("ava"): return "Avaric"; - case ("ath"): return "Athapascan languages"; - case ("ave"): return "Avestan"; - case ("awa"): return "Awadhi"; - case ("aym"): return "Aymara"; - case ("aze"): return "Azerbaijani"; - case ("nah"): return "Aztec"; - case ("ban"): return "Balinese"; - case ("bat"): return "Baltic (Other)"; - case ("bal"): return "Baluchi"; - case ("bam"): return "Bambara"; - case ("bai"): return "Bamileke languages"; - case ("bad"): return "Banda"; - case ("bnt"): return "Bantu (Other)"; - case ("bas"): return "Basa"; - case ("bak"): return "Bashkir"; - case ("baq"): - case ("eus"): return "Basque"; - case ("bej"): return "Beja"; - case ("bem"): return "Bemba"; - case ("ben"): return "Bengali"; - case ("ber"): return "Berber (Other)"; - case ("bho"): return "Bhojpuri"; - case ("bih"): return "Bihari"; - case ("bik"): return "Bikol"; - case ("bin"): return "Bini"; - case ("bis"): return "Bislama"; - case ("bra"): return "Braj"; - case ("bre"): return "Breton"; - case ("bug"): return "Buginese"; - case ("bul"): return "Bulgarian"; - case ("bua"): return "Buriat"; - case ("bur"): - case ("mya"): return "Burmese"; - case ("bel"): return "Byelorussian"; - case ("cad"): return "Caddo"; - case ("car"): return "Carib"; - case ("cat"): return "Catalan"; - case ("cau"): return "Caucasian (Other)"; - case ("ceb"): return "Cebuano"; - case ("cel"): return "Celtic (Other)"; - case ("cai"): return "Central American Indian (Other)"; - case ("chg"): return "Chagatai"; - case ("cha"): return "Chamorro"; - case ("che"): return "Chechen"; - case ("chr"): return "Cherokee"; - case ("chy"): return "Cheyenne"; - case ("chb"): return "Chibcha"; - case ("chi"): - case ("zho"): return "Chinese"; - case ("chn"): return "Chinook jargon"; - case ("cho"): return "Choctaw"; - case ("chu"): return "Church Slavic"; - case ("chv"): return "Chuvash"; - case ("cop"): return "Coptic"; - case ("cor"): return "Cornish"; - case ("cos"): return "Corsican"; - case ("cre"): return "Cree"; - case ("mus"): return "Creek"; - case ("crp"): return "Creoles and Pidgins (Other)"; - case ("cpe"): return "Creoles and Pidgins, English-based (Other)"; - case ("cpf"): return "Creoles and Pidgins, French-based (Other)"; - case ("cpp"): return "Creoles and Pidgins, Portuguese-based (Other)"; - case ("cus"): return "Cushitic (Other)"; - case (" "): return "Croatian"; - case ("ces"): - case ("cze"): return "Czech"; - case ("dak"): return "Dakota"; - case ("dan"): return "Danish"; - case ("del"): return "Delaware"; - case ("din"): return "Dinka"; - case ("div"): return "Divehi"; - case ("doi"): return "Dogri"; - case ("dra"): return "Dravidian (Other)"; - case ("dua"): return "Duala"; - case ("dut"): - case ("nla"): return "Dutch"; - case ("dum"): return "Dutch, Middle (ca. 1050-1350)"; - case ("dyu"): return "Dyula"; - case ("dzo"): return "Dzongkha"; - case ("efi"): return "Efik"; - case ("egy"): return "Egyptian (Ancient)"; - case ("eka"): return "Ekajuk"; - case ("elx"): return "Elamite"; - case ("eng"): return "English"; - case ("enm"): return "English, Middle (ca. 1100-1500)"; - case ("ang"): return "English, Old (ca. 450-1100)"; - case ("esk"): return "Eskimo (Other)"; - case ("epo"): return "Esperanto"; - case ("est"): return "Estonian"; - case ("ewe"): return "Ewe"; - case ("ewo"): return "Ewondo"; - case ("fan"): return "Fang"; - case ("fat"): return "Fanti"; - case ("fao"): return "Faroese"; - case ("fij"): return "Fijian"; - case ("fin"): return "Finnish"; - case ("fiu"): return "Finno-Ugrian (Other)"; - case ("fon"): return "Fon"; - case ("fra"): - case ("fre"): return "French"; - case ("frm"): return "French, Middle (ca. 1400-1600)"; - case ("fro"): return "French, Old (842- ca. 1400)"; - case ("fry"): return "Frisian"; - case ("ful"): return "Fulah"; - case ("gaa"): return "Ga"; - case ("gae"): - case ("gdh"): return "Gaelic (Scots)"; - case ("glg"): return "Gallegan"; - case ("lug"): return "Ganda"; - case ("gay"): return "Gayo"; - case ("gez"): return "Geez"; - case ("geo"): - case ("kat"): return "Georgian"; - case ("deu"): - case ("ger"): return "German"; - case ("gmh"): return "German, Middle High (ca. 1050-1500)"; - case ("goh"): return "German, Old High (ca. 750-1050)"; - case ("gem"): return "Germanic (Other)"; - case ("gil"): return "Gilbertese"; - case ("gon"): return "Gondi"; - case ("got"): return "Gothic"; - case ("grb"): return "Grebo"; - case ("grc"): return "Greek, Ancient (to 1453)"; - case ("ell"): - case ("gre"): return "Greek, Modern (1453-)"; - case ("kal"): return "Greenlandic"; - case ("grn"): return "Guarani"; - case ("guj"): return "Gujarati"; - case ("hai"): return "Haida"; - case ("hau"): return "Hausa"; - case ("haw"): return "Hawaiian"; - case ("heb"): return "Hebrew"; - case ("her"): return "Herero"; - case ("hil"): return "Hiligaynon"; - case ("him"): return "Himachali"; - case ("hin"): return "Hindi"; - case ("hmo"): return "Hiri Motu"; - case ("hun"): return "Hungarian"; - case ("hup"): return "Hupa"; - case ("iba"): return "Iban"; - case ("ice"): return "Icelandic"; - case ("ibo"): return "Igbo"; - case ("ijo"): return "Ijo"; - case ("ilo"): return "Iloko"; - case ("inc"): return "Indic (Other)"; - case ("ine"): return "Indo-European (Other)"; - case ("ind"): return "Indonesian"; - case ("ina"): return "Interlingua (International Auxiliary language Association)"; - // case ("ine"): return "Interlingue"; - case ("iku"): return "Inuktitut"; - case ("ipk"): return "Inupiak"; - case ("ira"): return "Iranian (Other)"; - case ("gai"): - case ("iri"): return "Irish"; - case ("sga"): return "Irish, Old (to 900)"; - case ("mga"): return "Irish, Middle (900 - 1200)"; - case ("iro"): return "Iroquoian languages"; - case ("ita"): return "Italian"; - case ("jpn"): return "Japanese"; - case ("jav"): - case ("jaw"): return "Javanese"; - case ("jrb"): return "Judeo-Arabic"; - case ("jpr"): return "Judeo-Persian"; - case ("kab"): return "Kabyle"; - case ("kac"): return "Kachin"; - case ("kam"): return "Kamba"; - case ("kan"): return "Kannada"; - case ("kau"): return "Kanuri"; - case ("kaa"): return "Kara-Kalpak"; - case ("kar"): return "Karen"; - case ("kas"): return "Kashmiri"; - case ("kaw"): return "Kawi"; - case ("kaz"): return "Kazakh"; - case ("kha"): return "Khasi"; - case ("khm"): return "Khmer"; - case ("khi"): return "Khoisan (Other)"; - case ("kho"): return "Khotanese"; - case ("kik"): return "Kikuyu"; - case ("kin"): return "Kinyarwanda"; - case ("kir"): return "Kirghiz"; - case ("kom"): return "Komi"; - case ("kon"): return "Kongo"; - case ("kok"): return "Konkani"; - case ("kor"): return "Korean"; - case ("kpe"): return "Kpelle"; - case ("kro"): return "Kru"; - case ("kua"): return "Kuanyama"; - case ("kum"): return "Kumyk"; - case ("kur"): return "Kurdish"; - case ("kru"): return "Kurukh"; - case ("kus"): return "Kusaie"; - case ("kut"): return "Kutenai"; - case ("lad"): return "Ladino"; - case ("lah"): return "Lahnda"; - case ("lam"): return "Lamba"; - case ("oci"): return "Langue d'Oc (post 1500)"; - case ("lao"): return "Lao"; - case ("lat"): return "Latin"; - case ("lav"): return "Latvian"; - case ("ltz"): return "Letzeburgesch"; - case ("lez"): return "Lezghian"; - case ("lin"): return "Lingala"; - case ("lit"): return "Lithuanian"; - case ("loz"): return "Lozi"; - case ("lub"): return "Luba-Katanga"; - case ("lui"): return "Luiseno"; - case ("lun"): return "Lunda"; - case ("luo"): return "Luo (Kenya and Tanzania)"; - case ("mac"): return "Macedonian"; - case ("mad"): return "Madurese"; - case ("mag"): return "Magahi"; - case ("mai"): return "Maithili"; - case ("mak"): return "Makasar"; - case ("mlg"): return "Malagasy"; - case ("may"): - case ("msa"): return "Malay"; - case ("mal"): return "Malayalam"; - case ("mlt"): return "Maltese"; - case ("man"): return "Mandingo"; - case ("mni"): return "Manipuri"; - case ("mno"): return "Manobo languages"; - case ("max"): return "Manx"; - case ("mao"): - case ("mri"): return "Maori"; - case ("mar"): return "Marathi"; - case ("chm"): return "Mari"; - case ("mah"): return "Marshall"; - case ("mwr"): return "Marwari"; - case ("mas"): return "Masai"; - case ("myn"): return "Mayan languages"; - case ("men"): return "Mende"; - case ("mic"): return "Micmac"; - case ("min"): return "Minangkabau"; - case ("mis"): return "Miscellaneous (Other)"; - case ("moh"): return "Mohawk"; - case ("mol"): return "Moldavian"; - case ("mkh"): return "Mon-Kmer (Other)"; - case ("lol"): return "Mongo"; - case ("mon"): return "Mongolian"; - case ("mos"): return "Mossi"; - case ("mul"): return "Multiple languages"; - case ("mun"): return "Munda languages"; - case ("nau"): return "Nauru"; - case ("nav"): return "Navajo"; - case ("nde"): return "Ndebele, North"; - case ("nbl"): return "Ndebele, South"; - case ("ndo"): return "Ndongo"; - case ("nep"): return "Nepali"; - case ("new"): return "Newari"; - case ("nic"): return "Niger-Kordofanian (Other)"; - case ("ssa"): return "Nilo-Saharan (Other)"; - case ("niu"): return "Niuean"; - case ("non"): return "Norse, Old"; - case ("nai"): return "North American Indian (Other)"; - case ("nor"): return "Norwegian"; - case ("nob"): return "Norwegian (Bokmål)"; - case ("nno"): return "Norwegian (Nynorsk)"; - case ("nub"): return "Nubian languages"; - case ("nym"): return "Nyamwezi"; - case ("nya"): return "Nyanja"; - case ("nyn"): return "Nyankole"; - case ("nyo"): return "Nyoro"; - case ("nzi"): return "Nzima"; - case ("oji"): return "Ojibwa"; - case ("ori"): return "Oriya"; - case ("orm"): return "Oromo"; - case ("osa"): return "Osage"; - case ("oss"): return "Ossetic"; - case ("oto"): return "Otomian languages"; - case ("pal"): return "Pahlavi"; - case ("pau"): return "Palauan"; - case ("pli"): return "Pali"; - case ("pam"): return "Pampanga"; - case ("pag"): return "Pangasinan"; - case ("pan"): return "Panjabi"; - case ("pap"): return "Papiamento"; - case ("paa"): return "Papuan-Australian (Other)"; - case ("fas"): - case ("per"): return "Persian"; - case ("peo"): return "Persian, Old (ca 600 - 400 B.C.)"; - case ("phn"): return "Phoenician"; - case ("pol"): return "Polish"; - case ("pon"): return "Ponape"; - case ("por"): return "Portuguese"; - case ("pra"): return "Prakrit languages"; - case ("pro"): return "Provencal, Old (to 1500)"; - case ("pus"): return "Pushto"; - case ("que"): return "Quechua"; - case ("roh"): return "Rhaeto-Romance"; - case ("raj"): return "Rajasthani"; - case ("rar"): return "Rarotongan"; - case ("roa"): return "Romance (Other)"; - case ("ron"): - case ("rum"): return "Romanian"; - case ("rom"): return "Romany"; - case ("run"): return "Rundi"; - case ("rus"): return "Russian"; - case ("sal"): return "Salishan languages"; - case ("sam"): return "Samaritan Aramaic"; - case ("smi"): return "Sami languages"; - case ("smo"): return "Samoan"; - case ("sad"): return "Sandawe"; - case ("sag"): return "Sango"; - case ("san"): return "Sanskrit"; - case ("srd"): return "Sardinian"; - case ("sco"): return "Scots"; - case ("sel"): return "Selkup"; - case ("sem"): return "Semitic (Other)"; - case ("srp"): return "Serbian"; - case ("scr"): return "Serbo-Croatian"; - case ("srr"): return "Serer"; - case ("shn"): return "Shan"; - case ("sna"): return "Shona"; - case ("sid"): return "Sidamo"; - case ("bla"): return "Siksika"; - case ("snd"): return "Sindhi"; - case ("sin"): return "Singhalese"; - case ("sit"): return "Sino-Tibetan (Other)"; - case ("sio"): return "Siouan languages"; - case ("sla"): return "Slavic (Other)"; - //case ("ssw"): return "Siswant"; - case ("slk"): return "Slovak"; - case ("slv"): return "Slovenian"; - case ("sog"): return "Sogdian"; - case ("som"): return "Somali"; - case ("son"): return "Songhai"; - case ("wen"): return "Sorbian languages"; - case ("nso"): return "Sotho, Northern"; - case ("sot"): return "Sotho, Southern"; - case ("sai"): return "South American Indian (Other)"; - case ("esl"): - case ("spa"): return "Spanish"; - case ("suk"): return "Sukuma"; - case ("sux"): return "Sumerian"; - case ("sun"): return "Sudanese"; - case ("sus"): return "Susu"; - case ("swa"): return "Swahili"; - case ("ssw"): return "Swazi"; - case ("sve"): return "Swedish"; - case ("swe"): return "Swedish"; - case ("syr"): return "Syriac"; - case ("tgl"): return "Tagalog"; - case ("tah"): return "Tahitian"; - case ("tgk"): return "Tajik"; - case ("tmh"): return "Tamashek"; - case ("tam"): return "Tamil"; - case ("tat"): return "Tatar"; - case ("tel"): return "Telugu"; - case ("ter"): return "Tereno"; - case ("tha"): return "Thai"; - case ("bod"): - case ("tib"): return "Tibetan"; - case ("tig"): return "Tigre"; - case ("tir"): return "Tigrinya"; - case ("tem"): return "Timne"; - case ("tiv"): return "Tivi"; - case ("tli"): return "Tlingit"; - case ("tog"): return "Tonga (Nyasa)"; - case ("ton"): return "Tonga (Tonga Islands)"; - case ("tru"): return "Truk"; - case ("tsi"): return "Tsimshian"; - case ("tso"): return "Tsonga"; - case ("tsn"): return "Tswana"; - case ("tum"): return "Tumbuka"; - case ("tur"): return "Turkish"; - case ("ota"): return "Turkish, Ottoman (1500 - 1928)"; - case ("tuk"): return "Turkmen"; - case ("tyv"): return "Tuvinian"; - case ("twi"): return "Twi"; - case ("uga"): return "Ugaritic"; - case ("uig"): return "Uighur"; - case ("ukr"): return "Ukrainian"; - case ("umb"): return "Umbundu"; - case ("und"): return "Undetermined"; - case ("urd"): return "Urdu"; - case ("uzb"): return "Uzbek"; - case ("vai"): return "Vai"; - case ("ven"): return "Venda"; - case ("vie"): return "Vietnamese"; - case ("vol"): return "Volapük"; - case ("vot"): return "Votic"; - case ("wak"): return "Wakashan languages"; - case ("wal"): return "Walamo"; - case ("war"): return "Waray"; - case ("was"): return "Washo"; - case ("cym"): - case ("wel"): return "Welsh"; - case ("wol"): return "Wolof"; - case ("xho"): return "Xhosa"; - case ("sah"): return "Yakut"; - case ("yao"): return "Yao"; - case ("yap"): return "Yap"; - case ("yid"): return "Yiddish"; - case ("yor"): return "Yoruba"; - case ("zap"): return "Zapotec"; - case ("zen"): return "Zenaga"; - case ("zha"): return "Zhuang"; - case ("zul"): return "Zulu"; - case ("zun"): return "Zuni"; - } - return "Any"; + var language = Iso639Dash2LanguageCode.List.FirstOrDefault(p => p.ThreeLetterCode == Iso639ThreeLetterCode); + return language == null ? "Any" : language.EnglishName; } } } diff --git a/src/libse/ContainerFormats/Mp4/Boxes/Mvhd.cs b/src/libse/ContainerFormats/Mp4/Boxes/Mvhd.cs index 6b5f5f468..4ef3e68e7 100644 --- a/src/libse/ContainerFormats/Mp4/Boxes/Mvhd.cs +++ b/src/libse/ContainerFormats/Mp4/Boxes/Mvhd.cs @@ -15,11 +15,12 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes public Mvhd(Stream fs) { Buffer = new byte[20]; - int bytesRead = fs.Read(Buffer, 0, Buffer.Length); + var bytesRead = fs.Read(Buffer, 0, Buffer.Length); if (bytesRead < Buffer.Length) { return; } + int version = Buffer[0]; if (version == 0) { diff --git a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs index 09a67979e..706076b2c 100644 --- a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs +++ b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs @@ -4,19 +4,22 @@ using Nikse.SubtitleEdit.Core.VobSub; using System; using System.Collections.Generic; using System.IO; +using System.Linq; +using System.Reflection; using System.Text; namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes { public class Stbl : Box { - public List Texts; public List SubPictures; public ulong StszSampleCount; public ulong TimeScale { get; set; } private readonly Mdia _mdia; public List SampleSizes; public List Ssts { get; set; } + public List Stsc { get; set; } + public List Texts; public Stbl(Stream fs, ulong maximumLength, ulong timeScale, string handlerType, Mdia mdia) { @@ -24,8 +27,9 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes _mdia = mdia; Position = (ulong)fs.Position; Ssts = new List(); + Stsc = new List(); SampleSizes = new List(); - Texts = new List(); + Texts = new List(); SubPictures = new List(); while (fs.Position < (long)maximumLength) { @@ -36,20 +40,28 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes if (Name == "stco") // 32-bit - chunk offset { - Buffer = new byte[Size - 4]; - fs.Read(Buffer, 0, Buffer.Length); - int version = Buffer[0]; - var totalEntries = GetUInt(4); - uint lastOffset = 0; - for (var i = 0; i < totalEntries; i++) + if (handlerType != "vide" && handlerType != "soun") { - var offset = GetUInt(8 + i * 4); - if (lastOffset + 5 < offset) + Buffer = new byte[Size - 4]; + fs.Read(Buffer, 0, Buffer.Length); + int version = Buffer[0]; + var totalEntries = GetUInt(4); + uint lastOffset = 0; + for (var i = 0; i < totalEntries; i++) { - ReadText(fs, offset, handlerType, i); - } + var offset = GetUInt(8 + i * 4); + if (lastOffset + 5 < offset) + { + var text = ReadText(fs, offset, handlerType, i); + Texts.Add(text); + } + else + { + Texts.Add(new ChunkText { Size = 2, Text = string.Empty }); + } - lastOffset = offset; + lastOffset = offset; + } } } else if (Name == "co64") // 64-bit @@ -64,7 +76,12 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes var offset = GetUInt64(8 + i * 8); if (lastOffset + 8 < offset) { - ReadText(fs, offset, handlerType, i); + var text = ReadText(fs, offset, handlerType, i); + Texts.Add(text); + } + else + { + Texts.Add(new ChunkText { Size = 2, Text = string.Empty }); } lastOffset = offset; @@ -95,24 +112,13 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes fs.Read(Buffer, 0, Buffer.Length); int version = Buffer[0]; var numberOfSampleTimes = GetUInt(4); - if (_mdia.IsClosedCaption) + for (var i = 0; i < numberOfSampleTimes; i++) { - for (var i = 0; i < numberOfSampleTimes; i++) - { - var sampleCount = GetUInt(8 + i * 8); - var sampleDelta = GetUInt(12 + i * 8); - Ssts.Add(new SampleTimeInfo { SampleCount = sampleCount, SampleDelta = sampleDelta }); - } - } - else - { - for (var i = 0; i < numberOfSampleTimes; i++) - { - var sampleCount = GetUInt(8 + i * 8); - var sampleDelta = GetUInt(12 + i * 8); - Ssts.Add(new SampleTimeInfo { SampleCount = sampleCount, SampleDelta = sampleDelta }); - } + var sampleCount = GetUInt(8 + i * 8); + var sampleDelta = GetUInt(12 + i * 8); + Ssts.Add(new SampleTimeInfo { SampleCount = sampleCount, SampleDelta = sampleDelta }); } + } else if (Name == "stsc") // sample table sample to chunk map { @@ -127,6 +133,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes var firstChunk = GetUInt(8 + i * 12); var samplesPerChunk = GetUInt(12 + i * 12); var sampleDescriptionIndex = GetUInt(16 + i * 12); + Stsc.Add(new SampleToChunkMap { FirstChunk = firstChunk, SamplesPerChunk = samplesPerChunk, SampleDescriptionIndex = sampleDescriptionIndex }); } } } @@ -135,11 +142,16 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } } - private void ReadText(Stream fs, ulong offset, string handlerType, int index) + private ChunkText ReadText(Stream fs, ulong offset, string handlerType, int index) { if (handlerType == "vide") { - return; + return null; + } + + if (handlerType == "soun") + { + return null; } fs.Seek((long)offset, SeekOrigin.Begin); @@ -161,7 +173,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes { if (handlerType == "text" && index + 1 < SampleSizes.Count && SampleSizes[index + 1] <= 2) { - return; + return new ChunkText { Size = 2, Text = string.Empty }; } if (textSize == 0) @@ -179,9 +191,9 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes if (_mdia.IsClosedCaption) { var sb = new StringBuilder(); - for (int j = 8; j < data.Length - 3; j++) + for (var j = 8; j < data.Length - 3; j++) { - string h = data[j].ToString("X2").ToLowerInvariant(); + var h = data[j].ToString("X2").ToLowerInvariant(); if (h.Length < 2) { h = "0" + h; @@ -218,57 +230,213 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } } - Texts.Add(text.Replace(Environment.NewLine, "\n").Replace("\n", Environment.NewLine)); + text = text.Replace(Environment.NewLine, "\n").Replace("\n", Environment.NewLine); + return new ChunkText { Size = textSize, Text = text }; } } + + return new ChunkText { Size = 2, Text = null }; + } + + public class ExpandedSample + { + public uint SampleDelta { get; set; } + public uint SampleSize { get; set; } + + public static List From(List stss, List stsz) + { + var result = new List(); + for (var index = 0; index < stss.Count; index++) + { + var timeToSample = stss[index]; + + for (var i = 0; i < timeToSample.SampleCount; i++) + { + result.Add(new ExpandedSample + { + SampleDelta = timeToSample.SampleDelta, + SampleSize = index < stsz.Count ? stsz[index] : 0, + }); + } + } + + return result; + } } public List GetParagraphs() { var paragraphs = new List(); + var timeSamples = ExpandedSample.From(Ssts, SampleSizes); double totalTime = 0; - var allTimes = new List(); - - // expand time codes - foreach (var timeInfo in Ssts) - { - for (var i = 0; i < timeInfo.SampleCount; i++) - { - totalTime += timeInfo.SampleDelta / (double)TimeScale; - allTimes.Add(totalTime); - } - } - - var index = 0; var textIndex = 0; - while (index < allTimes.Count - 1) + for (var index = 0; index < timeSamples.Count; index++) { - if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index + 1] == 2) + var timeSample = timeSamples[index]; + var before = totalTime; + totalTime += timeSample.SampleDelta / (double)TimeScale; + if (textIndex < Texts.Count) { - index++; + var text = Texts[textIndex]; + if (timeSample.SampleSize <= 2) + { + if (text.Size <= 2 && string.IsNullOrEmpty(text.Text)) + { + textIndex++; + } + } + else + { + if (text.Size > 2 && !string.IsNullOrEmpty(text.Text)) + { + paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0)); + } + textIndex++; + } } - - var timeStart = allTimes[index]; - var timeEnd = timeStart + 2; - if (index + 1 < allTimes.Count) - { - timeEnd = allTimes[index + 1]; - } - - if (_mdia.IsVobSubSubtitle && SubPictures.Count > textIndex) - { - paragraphs.Add(new Paragraph(string.Empty, timeStart * 1000.0, timeEnd * 1000.0)); - } - else if (Texts.Count > textIndex) - { - paragraphs.Add(new Paragraph(Texts[textIndex], timeStart * 1000.0, timeEnd * 1000.0)); - } - - index++; - textIndex++; } return paragraphs; } + + public List GetParagraphs2() + { + // expand time codes + var ssts = new List(); + foreach (var timeInfo in Ssts) + { + for (var i = 0; i < timeInfo.SampleCount; i++) + { + ssts.Add(new SampleTimeInfo { SampleCount = 1, SampleDelta = timeInfo.SampleDelta }); + } + } + Ssts = ssts; + + var paragraphs = new List(); + var textIndex = 0; + double totalTime = 0; + uint samplesPerChunk = 1; + var index = 0; + + var firstText = Texts.FirstOrDefault(); + var firstSampleSize = SampleSizes.FirstOrDefault(); + if (firstText != null && firstSampleSize != null && firstText.Size > firstSampleSize && firstSampleSize == 2 && !string.IsNullOrEmpty(firstText.Text)) + { + var timeInfo = Ssts[index]; + totalTime += timeInfo.SampleDelta / (double)TimeScale; + index++; + } + + while (index < Ssts.Count) + { + var timeInfo = Ssts[index]; + + var samplesPerChunkHit = Stsc.FirstOrDefault(p => p.FirstChunk == index + 1); + if (samplesPerChunkHit != null) + { + samplesPerChunk = samplesPerChunkHit.SamplesPerChunk; + if (samplesPerChunk == 0) + { + SeLogger.Error("MP4 has unexpected samples per chunk with zero"); + samplesPerChunk = 1; + } + } + + var before = totalTime; + totalTime += timeInfo.SampleDelta / (double)TimeScale; + + var newSamplesPerChunk = samplesPerChunk; + for (var i = 1; i < samplesPerChunk; i++) // extra + { + index++; + + samplesPerChunkHit = Stsc.FirstOrDefault(p => p.FirstChunk == index + 1); + if (samplesPerChunkHit != null) + { + newSamplesPerChunk = samplesPerChunkHit.SamplesPerChunk; + if (samplesPerChunk == 0) + { + SeLogger.Error("MP4 has unexpected samples per chunk with zero"); + newSamplesPerChunk = 1; + } + } + + if (index < Ssts.Count) + { + timeInfo = Ssts[index]; + totalTime += timeInfo.SampleDelta / (double)TimeScale; + } + } + samplesPerChunk = newSamplesPerChunk; + + if (textIndex < Texts.Count) + { + var text = Texts[textIndex]; + + if (text.Text != null && text.Text.Contains("How are we today", StringComparison.OrdinalIgnoreCase)) + { + } + + if (!string.IsNullOrEmpty(text.Text)) + { + paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0)); + } + + textIndex++; + } + + index++; + } + + return paragraphs; + + //var paragraphs = new List(); + //double totalTime = 0; + //var allTimes = new List(); + + //// expand time codes + //foreach (var timeInfo in Ssts) + //{ + // for (var i = 0; i < timeInfo.SampleCount; i++) + // { + // totalTime += timeInfo.SampleDelta / (double)TimeScale; + // allTimes.Add(totalTime); + // } + //} + + //var index = 0; + //var textIndex = 0; + //while (index < allTimes.Count - 1) + //{ + // if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index + 1] == 2) + // { + // index++; + // } + + // var timeStart = allTimes[index]; + // var timeEnd = timeStart + 2; + // if (index + 1 < allTimes.Count) + // { + // timeEnd = allTimes[index + 1]; + // } + + // if (_mdia.IsVobSubSubtitle && SubPictures.Count > textIndex) + // { + // paragraphs.Add(new Paragraph(string.Empty, timeStart * 1000.0, timeEnd * 1000.0)); + // } + // else if (Texts.Count > textIndex) + // { + // var text = Texts[textIndex].Text; + // if (text == null) + // text = string.Empty; + // paragraphs.Add(new Paragraph(text, timeStart * 1000.0, timeEnd * 1000.0)); + // } + + // index++; + // textIndex++; + //} + + //return paragraphs; + } } } diff --git a/src/libse/ContainerFormats/Mp4/ChunkText.cs b/src/libse/ContainerFormats/Mp4/ChunkText.cs new file mode 100644 index 000000000..a9520b81a --- /dev/null +++ b/src/libse/ContainerFormats/Mp4/ChunkText.cs @@ -0,0 +1,8 @@ +namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4 +{ + public class ChunkText + { + public uint Size { get; set; } + public string Text { get; set; } + } +} diff --git a/src/libse/ContainerFormats/Mp4/SampleToChunkMap.cs b/src/libse/ContainerFormats/Mp4/SampleToChunkMap.cs new file mode 100644 index 000000000..b0edd37ad --- /dev/null +++ b/src/libse/ContainerFormats/Mp4/SampleToChunkMap.cs @@ -0,0 +1,9 @@ +namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4 +{ + public class SampleToChunkMap + { + public uint FirstChunk { get; set; } + public uint SamplesPerChunk { get; set; } + public uint SampleDescriptionIndex { get; set; } + } +} From 153ddf5a82dc80e0cd05e077ba35ad92efe4fb87 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sat, 22 Oct 2022 17:17:47 +0200 Subject: [PATCH 2/4] more mp4 testing --- src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs | 105 +++++++++++-------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs index 706076b2c..5830f7401 100644 --- a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs +++ b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs @@ -57,7 +57,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } else { - Texts.Add(new ChunkText { Size = 2, Text = string.Empty }); + Texts.Add(new ChunkText { Size = 2, Text = null }); } lastOffset = offset; @@ -81,7 +81,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } else { - Texts.Add(new ChunkText { Size = 2, Text = string.Empty }); + Texts.Add(new ChunkText { Size = 2, Text = null }); } lastOffset = offset; @@ -264,7 +264,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } } - public List GetParagraphs() + public List GetParagraphsNew() { var paragraphs = new List(); var timeSamples = ExpandedSample.From(Ssts, SampleSizes); @@ -278,6 +278,11 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes if (textIndex < Texts.Count) { var text = Texts[textIndex]; + //if (text.Text == "In the tunnel.") + //{ + + //} + if (timeSample.SampleSize <= 2) { if (text.Size <= 2 && string.IsNullOrEmpty(text.Text)) @@ -291,6 +296,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes { paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0)); } + textIndex++; } } @@ -389,54 +395,65 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes } return paragraphs; + } - //var paragraphs = new List(); - //double totalTime = 0; - //var allTimes = new List(); + public List GetParagraphs() + { + var paragraphs = new List(); + double totalTime = 0; + var allTimes = new List(); - //// expand time codes - //foreach (var timeInfo in Ssts) - //{ - // for (var i = 0; i < timeInfo.SampleCount; i++) - // { - // totalTime += timeInfo.SampleDelta / (double)TimeScale; - // allTimes.Add(totalTime); - // } - //} + // expand time codes + foreach (var timeInfo in Ssts) + { + for (var i = 0; i < timeInfo.SampleCount; i++) + { + totalTime += timeInfo.SampleDelta / (double)TimeScale; + allTimes.Add(totalTime); + } + } - //var index = 0; - //var textIndex = 0; - //while (index < allTimes.Count - 1) - //{ - // if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index + 1] == 2) - // { - // index++; - // } + var index = 0; + var textIndex = 0; + while (index < allTimes.Count - 1) + { + if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index + 1] == 2) + { + index++; + } - // var timeStart = allTimes[index]; - // var timeEnd = timeStart + 2; - // if (index + 1 < allTimes.Count) - // { - // timeEnd = allTimes[index + 1]; - // } + var timeStart = allTimes[index]; + var timeEnd = timeStart + 2; + if (index + 1 < allTimes.Count) + { + timeEnd = allTimes[index + 1]; + } - // if (_mdia.IsVobSubSubtitle && SubPictures.Count > textIndex) - // { - // paragraphs.Add(new Paragraph(string.Empty, timeStart * 1000.0, timeEnd * 1000.0)); - // } - // else if (Texts.Count > textIndex) - // { - // var text = Texts[textIndex].Text; - // if (text == null) - // text = string.Empty; - // paragraphs.Add(new Paragraph(text, timeStart * 1000.0, timeEnd * 1000.0)); - // } + if (_mdia.IsVobSubSubtitle && SubPictures.Count > textIndex) + { + paragraphs.Add(new Paragraph(string.Empty, timeStart * 1000.0, timeEnd * 1000.0)); + } + else if (Texts.Count > textIndex) + { + var text = Texts[textIndex]; - // index++; - // textIndex++; - //} + if (text.Size <= 2 && text.Text == null && textIndex +1 < Texts.Count) + { + textIndex++; + text = Texts[textIndex]; + } - //return paragraphs; + if (!string.IsNullOrEmpty(text.Text)) + { + paragraphs.Add(new Paragraph(text.Text, timeStart * 1000.0, timeEnd * 1000.0)); + } + } + + index++; + textIndex++; + } + + return paragraphs; } } } From e4e3d1b49dc9d43c11bf5d4065a88dc057fdfd32 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sat, 22 Oct 2022 19:14:55 +0200 Subject: [PATCH 3/4] Get last text from mp4 sbtl --- src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs index 5830f7401..8ea47fd51 100644 --- a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs +++ b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs @@ -437,7 +437,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes { var text = Texts[textIndex]; - if (text.Size <= 2 && text.Text == null && textIndex +1 < Texts.Count) + if (text.Size <= 2 && text.Text == null && textIndex + 1 < Texts.Count) { textIndex++; text = Texts[textIndex]; @@ -453,6 +453,15 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes textIndex++; } + if (index < allTimes.Count && textIndex < Texts.Count && index > 0) + { + var text = Texts[textIndex]; + if (!string.IsNullOrEmpty(text.Text)) + { + paragraphs.Add(new Paragraph(text.Text, allTimes[index - 1] * 1000.0, allTimes[index] * 1000.0)); + } + } + return paragraphs; } } From a31efe13218300dc0a4b484d0b5e9e9825972958 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sat, 22 Oct 2022 20:44:46 +0200 Subject: [PATCH 4/4] Try to improve reading of first time code --- src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs | 39 +++++++++++--------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs index 8ea47fd51..29bb722b7 100644 --- a/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs +++ b/src/libse/ContainerFormats/Mp4/Boxes/Stbl.cs @@ -400,38 +400,42 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes public List GetParagraphs() { var paragraphs = new List(); - double totalTime = 0; - var allTimes = new List(); // expand time codes + var ssts = new List(); foreach (var timeInfo in Ssts) { for (var i = 0; i < timeInfo.SampleCount; i++) { - totalTime += timeInfo.SampleDelta / (double)TimeScale; - allTimes.Add(totalTime); + ssts.Add(new SampleTimeInfo { SampleCount = 1, SampleDelta = timeInfo.SampleDelta }); } } + Ssts = ssts; var index = 0; var textIndex = 0; - while (index < allTimes.Count - 1) + double totalTime = 0; + if (SampleSizes[0] == 2 && !string.IsNullOrEmpty(Texts[0].Text)) { - if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index + 1] == 2) + totalTime += Ssts[index].SampleDelta / (double)TimeScale; + index++; + } + + while (index < Ssts.Count) + { + var before = totalTime; + totalTime += Ssts[index].SampleDelta / (double)TimeScale; + + if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index] == 2) { index++; - } - - var timeStart = allTimes[index]; - var timeEnd = timeStart + 2; - if (index + 1 < allTimes.Count) - { - timeEnd = allTimes[index + 1]; + before = totalTime; + totalTime += Ssts[index].SampleDelta / (double)TimeScale; } if (_mdia.IsVobSubSubtitle && SubPictures.Count > textIndex) { - paragraphs.Add(new Paragraph(string.Empty, timeStart * 1000.0, timeEnd * 1000.0)); + paragraphs.Add(new Paragraph(string.Empty, before * 1000.0, totalTime * 1000.0)); } else if (Texts.Count > textIndex) { @@ -445,7 +449,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes if (!string.IsNullOrEmpty(text.Text)) { - paragraphs.Add(new Paragraph(text.Text, timeStart * 1000.0, timeEnd * 1000.0)); + paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0)); } } @@ -453,12 +457,13 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes textIndex++; } - if (index < allTimes.Count && textIndex < Texts.Count && index > 0) + if (index <= Ssts.Count && textIndex < Texts.Count && index > 0) { var text = Texts[textIndex]; if (!string.IsNullOrEmpty(text.Text)) { - paragraphs.Add(new Paragraph(text.Text, allTimes[index - 1] * 1000.0, allTimes[index] * 1000.0)); + var before = totalTime - Ssts[index - 1].SampleDelta / (double)TimeScale; + paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0)); } }