Merge pull request #6347 from SubtitleEdit/feature/try-to-improve-mp4-stbl-import

Feature/try to improve mp4 stbl import
This commit is contained in:
Nikolaj Olsson 2022-10-22 14:48:11 -04:00 committed by GitHub
commit c30d77f902
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 290 additions and 496 deletions

View File

@ -1,14 +1,15 @@
using System.Globalization;
using Nikse.SubtitleEdit.Core.Common;
using System.Globalization;
using System.IO;
using System.Linq;
namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
{
/// <summary>
/// Media Header Box
/// Media Header Box.
/// </summary>
public class Mdhd : Box
{
public readonly ulong CreationTime;
public readonly ulong ModificationTime;
public readonly ulong TimeScale;
@ -18,8 +19,13 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
public Mdhd(Stream fs, ulong size)
{
Buffer = new byte[size - 4];
fs.Read(Buffer, 0, Buffer.Length);
int languageIndex = 20;
var bytesRead = fs.Read(Buffer, 0, Buffer.Length);
if (bytesRead < Buffer.Length)
{
return;
}
var languageIndex = 20;
int version = Buffer[0];
if (version == 0)
{
@ -38,450 +44,21 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
}
// language code = skip first byte, 5 bytes + 5 bytes + 5 bytes (add 0x60 to get ascii value)
int languageByte = ((Buffer[languageIndex] << 1) >> 3) + 0x60;
int languageByte2 = ((Buffer[languageIndex] & 0x3) << 3) + (Buffer[languageIndex + 1] >> 5) + 0x60;
int languageByte3 = (Buffer[languageIndex + 1] & 0x1f) + 0x60;
char x = (char)languageByte;
char x2 = (char)languageByte2;
char x3 = (char)languageByte3;
Iso639ThreeLetterCode = x.ToString(CultureInfo.InvariantCulture) + x2.ToString(CultureInfo.InvariantCulture) + x3.ToString(CultureInfo.InvariantCulture);
var languageByte1 = ((Buffer[languageIndex] << 1) >> 3) + 0x60;
var languageByte2 = ((Buffer[languageIndex] & 0x3) << 3) + (Buffer[languageIndex + 1] >> 5) + 0x60;
var languageByte3 = (Buffer[languageIndex + 1] & 0x1f) + 0x60;
var x1 = (char)languageByte1;
var x2 = (char)languageByte2;
var x3 = (char)languageByte3;
Iso639ThreeLetterCode = x1.ToString(CultureInfo.InvariantCulture) + x2.ToString(CultureInfo.InvariantCulture) + x3.ToString(CultureInfo.InvariantCulture);
}
public string LanguageString
{
get
{
switch (Iso639ThreeLetterCode)
{
case ("abk"): return "Abkhazian";
case ("ace"): return "Achinese";
case ("ach"): return "Acoli";
case ("ada"): return "Adangme";
case ("aar"): return "Afar";
case ("afh"): return "Afrihili";
case ("afr"): return "Afrikaans";
case ("afa"): return "Afro-Asiatic (Other)";
case ("aka"): return "Akan";
case ("akk"): return "Akkadian";
case ("alb"):
case ("sqi"): return "Albanian";
case ("ale"): return "Aleut";
case ("alg"): return "Algonquian languages";
case ("tut"): return "Altaic (Other)";
case ("amh"): return "Amharic";
case ("apa"): return "Apache languages";
case ("arc"): return "Aramaic";
case ("ara"):
case ("arg"): return "Arabic";
case ("arp"): return "Arapaho";
case ("arn"): return "Araucanian";
case ("arw"): return "Arawak";
case ("arm"):
case ("hye"): return "Armenian";
case ("art"): return "Artificial (Other)";
case ("asm"): return "Assamese";
case ("ava"): return "Avaric";
case ("ath"): return "Athapascan languages";
case ("ave"): return "Avestan";
case ("awa"): return "Awadhi";
case ("aym"): return "Aymara";
case ("aze"): return "Azerbaijani";
case ("nah"): return "Aztec";
case ("ban"): return "Balinese";
case ("bat"): return "Baltic (Other)";
case ("bal"): return "Baluchi";
case ("bam"): return "Bambara";
case ("bai"): return "Bamileke languages";
case ("bad"): return "Banda";
case ("bnt"): return "Bantu (Other)";
case ("bas"): return "Basa";
case ("bak"): return "Bashkir";
case ("baq"):
case ("eus"): return "Basque";
case ("bej"): return "Beja";
case ("bem"): return "Bemba";
case ("ben"): return "Bengali";
case ("ber"): return "Berber (Other)";
case ("bho"): return "Bhojpuri";
case ("bih"): return "Bihari";
case ("bik"): return "Bikol";
case ("bin"): return "Bini";
case ("bis"): return "Bislama";
case ("bra"): return "Braj";
case ("bre"): return "Breton";
case ("bug"): return "Buginese";
case ("bul"): return "Bulgarian";
case ("bua"): return "Buriat";
case ("bur"):
case ("mya"): return "Burmese";
case ("bel"): return "Byelorussian";
case ("cad"): return "Caddo";
case ("car"): return "Carib";
case ("cat"): return "Catalan";
case ("cau"): return "Caucasian (Other)";
case ("ceb"): return "Cebuano";
case ("cel"): return "Celtic (Other)";
case ("cai"): return "Central American Indian (Other)";
case ("chg"): return "Chagatai";
case ("cha"): return "Chamorro";
case ("che"): return "Chechen";
case ("chr"): return "Cherokee";
case ("chy"): return "Cheyenne";
case ("chb"): return "Chibcha";
case ("chi"):
case ("zho"): return "Chinese";
case ("chn"): return "Chinook jargon";
case ("cho"): return "Choctaw";
case ("chu"): return "Church Slavic";
case ("chv"): return "Chuvash";
case ("cop"): return "Coptic";
case ("cor"): return "Cornish";
case ("cos"): return "Corsican";
case ("cre"): return "Cree";
case ("mus"): return "Creek";
case ("crp"): return "Creoles and Pidgins (Other)";
case ("cpe"): return "Creoles and Pidgins, English-based (Other)";
case ("cpf"): return "Creoles and Pidgins, French-based (Other)";
case ("cpp"): return "Creoles and Pidgins, Portuguese-based (Other)";
case ("cus"): return "Cushitic (Other)";
case (" "): return "Croatian";
case ("ces"):
case ("cze"): return "Czech";
case ("dak"): return "Dakota";
case ("dan"): return "Danish";
case ("del"): return "Delaware";
case ("din"): return "Dinka";
case ("div"): return "Divehi";
case ("doi"): return "Dogri";
case ("dra"): return "Dravidian (Other)";
case ("dua"): return "Duala";
case ("dut"):
case ("nla"): return "Dutch";
case ("dum"): return "Dutch, Middle (ca. 1050-1350)";
case ("dyu"): return "Dyula";
case ("dzo"): return "Dzongkha";
case ("efi"): return "Efik";
case ("egy"): return "Egyptian (Ancient)";
case ("eka"): return "Ekajuk";
case ("elx"): return "Elamite";
case ("eng"): return "English";
case ("enm"): return "English, Middle (ca. 1100-1500)";
case ("ang"): return "English, Old (ca. 450-1100)";
case ("esk"): return "Eskimo (Other)";
case ("epo"): return "Esperanto";
case ("est"): return "Estonian";
case ("ewe"): return "Ewe";
case ("ewo"): return "Ewondo";
case ("fan"): return "Fang";
case ("fat"): return "Fanti";
case ("fao"): return "Faroese";
case ("fij"): return "Fijian";
case ("fin"): return "Finnish";
case ("fiu"): return "Finno-Ugrian (Other)";
case ("fon"): return "Fon";
case ("fra"):
case ("fre"): return "French";
case ("frm"): return "French, Middle (ca. 1400-1600)";
case ("fro"): return "French, Old (842- ca. 1400)";
case ("fry"): return "Frisian";
case ("ful"): return "Fulah";
case ("gaa"): return "Ga";
case ("gae"):
case ("gdh"): return "Gaelic (Scots)";
case ("glg"): return "Gallegan";
case ("lug"): return "Ganda";
case ("gay"): return "Gayo";
case ("gez"): return "Geez";
case ("geo"):
case ("kat"): return "Georgian";
case ("deu"):
case ("ger"): return "German";
case ("gmh"): return "German, Middle High (ca. 1050-1500)";
case ("goh"): return "German, Old High (ca. 750-1050)";
case ("gem"): return "Germanic (Other)";
case ("gil"): return "Gilbertese";
case ("gon"): return "Gondi";
case ("got"): return "Gothic";
case ("grb"): return "Grebo";
case ("grc"): return "Greek, Ancient (to 1453)";
case ("ell"):
case ("gre"): return "Greek, Modern (1453-)";
case ("kal"): return "Greenlandic";
case ("grn"): return "Guarani";
case ("guj"): return "Gujarati";
case ("hai"): return "Haida";
case ("hau"): return "Hausa";
case ("haw"): return "Hawaiian";
case ("heb"): return "Hebrew";
case ("her"): return "Herero";
case ("hil"): return "Hiligaynon";
case ("him"): return "Himachali";
case ("hin"): return "Hindi";
case ("hmo"): return "Hiri Motu";
case ("hun"): return "Hungarian";
case ("hup"): return "Hupa";
case ("iba"): return "Iban";
case ("ice"): return "Icelandic";
case ("ibo"): return "Igbo";
case ("ijo"): return "Ijo";
case ("ilo"): return "Iloko";
case ("inc"): return "Indic (Other)";
case ("ine"): return "Indo-European (Other)";
case ("ind"): return "Indonesian";
case ("ina"): return "Interlingua (International Auxiliary language Association)";
// case ("ine"): return "Interlingue";
case ("iku"): return "Inuktitut";
case ("ipk"): return "Inupiak";
case ("ira"): return "Iranian (Other)";
case ("gai"):
case ("iri"): return "Irish";
case ("sga"): return "Irish, Old (to 900)";
case ("mga"): return "Irish, Middle (900 - 1200)";
case ("iro"): return "Iroquoian languages";
case ("ita"): return "Italian";
case ("jpn"): return "Japanese";
case ("jav"):
case ("jaw"): return "Javanese";
case ("jrb"): return "Judeo-Arabic";
case ("jpr"): return "Judeo-Persian";
case ("kab"): return "Kabyle";
case ("kac"): return "Kachin";
case ("kam"): return "Kamba";
case ("kan"): return "Kannada";
case ("kau"): return "Kanuri";
case ("kaa"): return "Kara-Kalpak";
case ("kar"): return "Karen";
case ("kas"): return "Kashmiri";
case ("kaw"): return "Kawi";
case ("kaz"): return "Kazakh";
case ("kha"): return "Khasi";
case ("khm"): return "Khmer";
case ("khi"): return "Khoisan (Other)";
case ("kho"): return "Khotanese";
case ("kik"): return "Kikuyu";
case ("kin"): return "Kinyarwanda";
case ("kir"): return "Kirghiz";
case ("kom"): return "Komi";
case ("kon"): return "Kongo";
case ("kok"): return "Konkani";
case ("kor"): return "Korean";
case ("kpe"): return "Kpelle";
case ("kro"): return "Kru";
case ("kua"): return "Kuanyama";
case ("kum"): return "Kumyk";
case ("kur"): return "Kurdish";
case ("kru"): return "Kurukh";
case ("kus"): return "Kusaie";
case ("kut"): return "Kutenai";
case ("lad"): return "Ladino";
case ("lah"): return "Lahnda";
case ("lam"): return "Lamba";
case ("oci"): return "Langue d'Oc (post 1500)";
case ("lao"): return "Lao";
case ("lat"): return "Latin";
case ("lav"): return "Latvian";
case ("ltz"): return "Letzeburgesch";
case ("lez"): return "Lezghian";
case ("lin"): return "Lingala";
case ("lit"): return "Lithuanian";
case ("loz"): return "Lozi";
case ("lub"): return "Luba-Katanga";
case ("lui"): return "Luiseno";
case ("lun"): return "Lunda";
case ("luo"): return "Luo (Kenya and Tanzania)";
case ("mac"): return "Macedonian";
case ("mad"): return "Madurese";
case ("mag"): return "Magahi";
case ("mai"): return "Maithili";
case ("mak"): return "Makasar";
case ("mlg"): return "Malagasy";
case ("may"):
case ("msa"): return "Malay";
case ("mal"): return "Malayalam";
case ("mlt"): return "Maltese";
case ("man"): return "Mandingo";
case ("mni"): return "Manipuri";
case ("mno"): return "Manobo languages";
case ("max"): return "Manx";
case ("mao"):
case ("mri"): return "Maori";
case ("mar"): return "Marathi";
case ("chm"): return "Mari";
case ("mah"): return "Marshall";
case ("mwr"): return "Marwari";
case ("mas"): return "Masai";
case ("myn"): return "Mayan languages";
case ("men"): return "Mende";
case ("mic"): return "Micmac";
case ("min"): return "Minangkabau";
case ("mis"): return "Miscellaneous (Other)";
case ("moh"): return "Mohawk";
case ("mol"): return "Moldavian";
case ("mkh"): return "Mon-Kmer (Other)";
case ("lol"): return "Mongo";
case ("mon"): return "Mongolian";
case ("mos"): return "Mossi";
case ("mul"): return "Multiple languages";
case ("mun"): return "Munda languages";
case ("nau"): return "Nauru";
case ("nav"): return "Navajo";
case ("nde"): return "Ndebele, North";
case ("nbl"): return "Ndebele, South";
case ("ndo"): return "Ndongo";
case ("nep"): return "Nepali";
case ("new"): return "Newari";
case ("nic"): return "Niger-Kordofanian (Other)";
case ("ssa"): return "Nilo-Saharan (Other)";
case ("niu"): return "Niuean";
case ("non"): return "Norse, Old";
case ("nai"): return "North American Indian (Other)";
case ("nor"): return "Norwegian";
case ("nob"): return "Norwegian (Bokmål)";
case ("nno"): return "Norwegian (Nynorsk)";
case ("nub"): return "Nubian languages";
case ("nym"): return "Nyamwezi";
case ("nya"): return "Nyanja";
case ("nyn"): return "Nyankole";
case ("nyo"): return "Nyoro";
case ("nzi"): return "Nzima";
case ("oji"): return "Ojibwa";
case ("ori"): return "Oriya";
case ("orm"): return "Oromo";
case ("osa"): return "Osage";
case ("oss"): return "Ossetic";
case ("oto"): return "Otomian languages";
case ("pal"): return "Pahlavi";
case ("pau"): return "Palauan";
case ("pli"): return "Pali";
case ("pam"): return "Pampanga";
case ("pag"): return "Pangasinan";
case ("pan"): return "Panjabi";
case ("pap"): return "Papiamento";
case ("paa"): return "Papuan-Australian (Other)";
case ("fas"):
case ("per"): return "Persian";
case ("peo"): return "Persian, Old (ca 600 - 400 B.C.)";
case ("phn"): return "Phoenician";
case ("pol"): return "Polish";
case ("pon"): return "Ponape";
case ("por"): return "Portuguese";
case ("pra"): return "Prakrit languages";
case ("pro"): return "Provencal, Old (to 1500)";
case ("pus"): return "Pushto";
case ("que"): return "Quechua";
case ("roh"): return "Rhaeto-Romance";
case ("raj"): return "Rajasthani";
case ("rar"): return "Rarotongan";
case ("roa"): return "Romance (Other)";
case ("ron"):
case ("rum"): return "Romanian";
case ("rom"): return "Romany";
case ("run"): return "Rundi";
case ("rus"): return "Russian";
case ("sal"): return "Salishan languages";
case ("sam"): return "Samaritan Aramaic";
case ("smi"): return "Sami languages";
case ("smo"): return "Samoan";
case ("sad"): return "Sandawe";
case ("sag"): return "Sango";
case ("san"): return "Sanskrit";
case ("srd"): return "Sardinian";
case ("sco"): return "Scots";
case ("sel"): return "Selkup";
case ("sem"): return "Semitic (Other)";
case ("srp"): return "Serbian";
case ("scr"): return "Serbo-Croatian";
case ("srr"): return "Serer";
case ("shn"): return "Shan";
case ("sna"): return "Shona";
case ("sid"): return "Sidamo";
case ("bla"): return "Siksika";
case ("snd"): return "Sindhi";
case ("sin"): return "Singhalese";
case ("sit"): return "Sino-Tibetan (Other)";
case ("sio"): return "Siouan languages";
case ("sla"): return "Slavic (Other)";
//case ("ssw"): return "Siswant";
case ("slk"): return "Slovak";
case ("slv"): return "Slovenian";
case ("sog"): return "Sogdian";
case ("som"): return "Somali";
case ("son"): return "Songhai";
case ("wen"): return "Sorbian languages";
case ("nso"): return "Sotho, Northern";
case ("sot"): return "Sotho, Southern";
case ("sai"): return "South American Indian (Other)";
case ("esl"):
case ("spa"): return "Spanish";
case ("suk"): return "Sukuma";
case ("sux"): return "Sumerian";
case ("sun"): return "Sudanese";
case ("sus"): return "Susu";
case ("swa"): return "Swahili";
case ("ssw"): return "Swazi";
case ("sve"): return "Swedish";
case ("swe"): return "Swedish";
case ("syr"): return "Syriac";
case ("tgl"): return "Tagalog";
case ("tah"): return "Tahitian";
case ("tgk"): return "Tajik";
case ("tmh"): return "Tamashek";
case ("tam"): return "Tamil";
case ("tat"): return "Tatar";
case ("tel"): return "Telugu";
case ("ter"): return "Tereno";
case ("tha"): return "Thai";
case ("bod"):
case ("tib"): return "Tibetan";
case ("tig"): return "Tigre";
case ("tir"): return "Tigrinya";
case ("tem"): return "Timne";
case ("tiv"): return "Tivi";
case ("tli"): return "Tlingit";
case ("tog"): return "Tonga (Nyasa)";
case ("ton"): return "Tonga (Tonga Islands)";
case ("tru"): return "Truk";
case ("tsi"): return "Tsimshian";
case ("tso"): return "Tsonga";
case ("tsn"): return "Tswana";
case ("tum"): return "Tumbuka";
case ("tur"): return "Turkish";
case ("ota"): return "Turkish, Ottoman (1500 - 1928)";
case ("tuk"): return "Turkmen";
case ("tyv"): return "Tuvinian";
case ("twi"): return "Twi";
case ("uga"): return "Ugaritic";
case ("uig"): return "Uighur";
case ("ukr"): return "Ukrainian";
case ("umb"): return "Umbundu";
case ("und"): return "Undetermined";
case ("urd"): return "Urdu";
case ("uzb"): return "Uzbek";
case ("vai"): return "Vai";
case ("ven"): return "Venda";
case ("vie"): return "Vietnamese";
case ("vol"): return "Volapük";
case ("vot"): return "Votic";
case ("wak"): return "Wakashan languages";
case ("wal"): return "Walamo";
case ("war"): return "Waray";
case ("was"): return "Washo";
case ("cym"):
case ("wel"): return "Welsh";
case ("wol"): return "Wolof";
case ("xho"): return "Xhosa";
case ("sah"): return "Yakut";
case ("yao"): return "Yao";
case ("yap"): return "Yap";
case ("yid"): return "Yiddish";
case ("yor"): return "Yoruba";
case ("zap"): return "Zapotec";
case ("zen"): return "Zenaga";
case ("zha"): return "Zhuang";
case ("zul"): return "Zulu";
case ("zun"): return "Zuni";
}
return "Any";
var language = Iso639Dash2LanguageCode.List.FirstOrDefault(p => p.ThreeLetterCode == Iso639ThreeLetterCode);
return language == null ? "Any" : language.EnglishName;
}
}
}

View File

@ -15,11 +15,12 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
public Mvhd(Stream fs)
{
Buffer = new byte[20];
int bytesRead = fs.Read(Buffer, 0, Buffer.Length);
var bytesRead = fs.Read(Buffer, 0, Buffer.Length);
if (bytesRead < Buffer.Length)
{
return;
}
int version = Buffer[0];
if (version == 0)
{

View File

@ -4,19 +4,22 @@ using Nikse.SubtitleEdit.Core.VobSub;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
{
public class Stbl : Box
{
public List<string> Texts;
public List<SubPicture> SubPictures;
public ulong StszSampleCount;
public ulong TimeScale { get; set; }
private readonly Mdia _mdia;
public List<uint> SampleSizes;
public List<SampleTimeInfo> Ssts { get; set; }
public List<SampleToChunkMap> Stsc { get; set; }
public List<ChunkText> Texts;
public Stbl(Stream fs, ulong maximumLength, ulong timeScale, string handlerType, Mdia mdia)
{
@ -24,8 +27,9 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
_mdia = mdia;
Position = (ulong)fs.Position;
Ssts = new List<SampleTimeInfo>();
Stsc = new List<SampleToChunkMap>();
SampleSizes = new List<uint>();
Texts = new List<string>();
Texts = new List<ChunkText>();
SubPictures = new List<SubPicture>();
while (fs.Position < (long)maximumLength)
{
@ -36,20 +40,28 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
if (Name == "stco") // 32-bit - chunk offset
{
Buffer = new byte[Size - 4];
fs.Read(Buffer, 0, Buffer.Length);
int version = Buffer[0];
var totalEntries = GetUInt(4);
uint lastOffset = 0;
for (var i = 0; i < totalEntries; i++)
if (handlerType != "vide" && handlerType != "soun")
{
var offset = GetUInt(8 + i * 4);
if (lastOffset + 5 < offset)
Buffer = new byte[Size - 4];
fs.Read(Buffer, 0, Buffer.Length);
int version = Buffer[0];
var totalEntries = GetUInt(4);
uint lastOffset = 0;
for (var i = 0; i < totalEntries; i++)
{
ReadText(fs, offset, handlerType, i);
}
var offset = GetUInt(8 + i * 4);
if (lastOffset + 5 < offset)
{
var text = ReadText(fs, offset, handlerType, i);
Texts.Add(text);
}
else
{
Texts.Add(new ChunkText { Size = 2, Text = null });
}
lastOffset = offset;
lastOffset = offset;
}
}
}
else if (Name == "co64") // 64-bit
@ -64,7 +76,12 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
var offset = GetUInt64(8 + i * 8);
if (lastOffset + 8 < offset)
{
ReadText(fs, offset, handlerType, i);
var text = ReadText(fs, offset, handlerType, i);
Texts.Add(text);
}
else
{
Texts.Add(new ChunkText { Size = 2, Text = null });
}
lastOffset = offset;
@ -95,24 +112,13 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
fs.Read(Buffer, 0, Buffer.Length);
int version = Buffer[0];
var numberOfSampleTimes = GetUInt(4);
if (_mdia.IsClosedCaption)
for (var i = 0; i < numberOfSampleTimes; i++)
{
for (var i = 0; i < numberOfSampleTimes; i++)
{
var sampleCount = GetUInt(8 + i * 8);
var sampleDelta = GetUInt(12 + i * 8);
Ssts.Add(new SampleTimeInfo { SampleCount = sampleCount, SampleDelta = sampleDelta });
}
}
else
{
for (var i = 0; i < numberOfSampleTimes; i++)
{
var sampleCount = GetUInt(8 + i * 8);
var sampleDelta = GetUInt(12 + i * 8);
Ssts.Add(new SampleTimeInfo { SampleCount = sampleCount, SampleDelta = sampleDelta });
}
var sampleCount = GetUInt(8 + i * 8);
var sampleDelta = GetUInt(12 + i * 8);
Ssts.Add(new SampleTimeInfo { SampleCount = sampleCount, SampleDelta = sampleDelta });
}
}
else if (Name == "stsc") // sample table sample to chunk map
{
@ -127,6 +133,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
var firstChunk = GetUInt(8 + i * 12);
var samplesPerChunk = GetUInt(12 + i * 12);
var sampleDescriptionIndex = GetUInt(16 + i * 12);
Stsc.Add(new SampleToChunkMap { FirstChunk = firstChunk, SamplesPerChunk = samplesPerChunk, SampleDescriptionIndex = sampleDescriptionIndex });
}
}
}
@ -135,11 +142,16 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
}
}
private void ReadText(Stream fs, ulong offset, string handlerType, int index)
private ChunkText ReadText(Stream fs, ulong offset, string handlerType, int index)
{
if (handlerType == "vide")
{
return;
return null;
}
if (handlerType == "soun")
{
return null;
}
fs.Seek((long)offset, SeekOrigin.Begin);
@ -161,7 +173,7 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
{
if (handlerType == "text" && index + 1 < SampleSizes.Count && SampleSizes[index + 1] <= 2)
{
return;
return new ChunkText { Size = 2, Text = string.Empty };
}
if (textSize == 0)
@ -179,9 +191,9 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
if (_mdia.IsClosedCaption)
{
var sb = new StringBuilder();
for (int j = 8; j < data.Length - 3; j++)
for (var j = 8; j < data.Length - 3; j++)
{
string h = data[j].ToString("X2").ToLowerInvariant();
var h = data[j].ToString("X2").ToLowerInvariant();
if (h.Length < 2)
{
h = "0" + h;
@ -218,56 +230,243 @@ namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4.Boxes
}
}
Texts.Add(text.Replace(Environment.NewLine, "\n").Replace("\n", Environment.NewLine));
text = text.Replace(Environment.NewLine, "\n").Replace("\n", Environment.NewLine);
return new ChunkText { Size = textSize, Text = text };
}
}
return new ChunkText { Size = 2, Text = null };
}
public class ExpandedSample
{
public uint SampleDelta { get; set; }
public uint SampleSize { get; set; }
public static List<ExpandedSample> From(List<SampleTimeInfo> stss, List<uint> stsz)
{
var result = new List<ExpandedSample>();
for (var index = 0; index < stss.Count; index++)
{
var timeToSample = stss[index];
for (var i = 0; i < timeToSample.SampleCount; i++)
{
result.Add(new ExpandedSample
{
SampleDelta = timeToSample.SampleDelta,
SampleSize = index < stsz.Count ? stsz[index] : 0,
});
}
}
return result;
}
}
public List<Paragraph> GetParagraphsNew()
{
var paragraphs = new List<Paragraph>();
var timeSamples = ExpandedSample.From(Ssts, SampleSizes);
double totalTime = 0;
var textIndex = 0;
for (var index = 0; index < timeSamples.Count; index++)
{
var timeSample = timeSamples[index];
var before = totalTime;
totalTime += timeSample.SampleDelta / (double)TimeScale;
if (textIndex < Texts.Count)
{
var text = Texts[textIndex];
//if (text.Text == "In the tunnel.")
//{
//}
if (timeSample.SampleSize <= 2)
{
if (text.Size <= 2 && string.IsNullOrEmpty(text.Text))
{
textIndex++;
}
}
else
{
if (text.Size > 2 && !string.IsNullOrEmpty(text.Text))
{
paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0));
}
textIndex++;
}
}
}
return paragraphs;
}
public List<Paragraph> GetParagraphs2()
{
// expand time codes
var ssts = new List<SampleTimeInfo>();
foreach (var timeInfo in Ssts)
{
for (var i = 0; i < timeInfo.SampleCount; i++)
{
ssts.Add(new SampleTimeInfo { SampleCount = 1, SampleDelta = timeInfo.SampleDelta });
}
}
Ssts = ssts;
var paragraphs = new List<Paragraph>();
var textIndex = 0;
double totalTime = 0;
uint samplesPerChunk = 1;
var index = 0;
var firstText = Texts.FirstOrDefault();
var firstSampleSize = SampleSizes.FirstOrDefault();
if (firstText != null && firstSampleSize != null && firstText.Size > firstSampleSize && firstSampleSize == 2 && !string.IsNullOrEmpty(firstText.Text))
{
var timeInfo = Ssts[index];
totalTime += timeInfo.SampleDelta / (double)TimeScale;
index++;
}
while (index < Ssts.Count)
{
var timeInfo = Ssts[index];
var samplesPerChunkHit = Stsc.FirstOrDefault(p => p.FirstChunk == index + 1);
if (samplesPerChunkHit != null)
{
samplesPerChunk = samplesPerChunkHit.SamplesPerChunk;
if (samplesPerChunk == 0)
{
SeLogger.Error("MP4 has unexpected samples per chunk with zero");
samplesPerChunk = 1;
}
}
var before = totalTime;
totalTime += timeInfo.SampleDelta / (double)TimeScale;
var newSamplesPerChunk = samplesPerChunk;
for (var i = 1; i < samplesPerChunk; i++) // extra
{
index++;
samplesPerChunkHit = Stsc.FirstOrDefault(p => p.FirstChunk == index + 1);
if (samplesPerChunkHit != null)
{
newSamplesPerChunk = samplesPerChunkHit.SamplesPerChunk;
if (samplesPerChunk == 0)
{
SeLogger.Error("MP4 has unexpected samples per chunk with zero");
newSamplesPerChunk = 1;
}
}
if (index < Ssts.Count)
{
timeInfo = Ssts[index];
totalTime += timeInfo.SampleDelta / (double)TimeScale;
}
}
samplesPerChunk = newSamplesPerChunk;
if (textIndex < Texts.Count)
{
var text = Texts[textIndex];
if (text.Text != null && text.Text.Contains("How are we today", StringComparison.OrdinalIgnoreCase))
{
}
if (!string.IsNullOrEmpty(text.Text))
{
paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0));
}
textIndex++;
}
index++;
}
return paragraphs;
}
public List<Paragraph> GetParagraphs()
{
var paragraphs = new List<Paragraph>();
double totalTime = 0;
var allTimes = new List<double>();
// expand time codes
var ssts = new List<SampleTimeInfo>();
foreach (var timeInfo in Ssts)
{
for (var i = 0; i < timeInfo.SampleCount; i++)
{
totalTime += timeInfo.SampleDelta / (double)TimeScale;
allTimes.Add(totalTime);
ssts.Add(new SampleTimeInfo { SampleCount = 1, SampleDelta = timeInfo.SampleDelta });
}
}
Ssts = ssts;
var index = 0;
var textIndex = 0;
while (index < allTimes.Count - 1)
double totalTime = 0;
if (SampleSizes[0] == 2 && !string.IsNullOrEmpty(Texts[0].Text))
{
if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index + 1] == 2)
totalTime += Ssts[index].SampleDelta / (double)TimeScale;
index++;
}
while (index < Ssts.Count)
{
var before = totalTime;
totalTime += Ssts[index].SampleDelta / (double)TimeScale;
if (index > 0 && index + 1 < SampleSizes.Count && SampleSizes[index] == 2)
{
index++;
}
var timeStart = allTimes[index];
var timeEnd = timeStart + 2;
if (index + 1 < allTimes.Count)
{
timeEnd = allTimes[index + 1];
before = totalTime;
totalTime += Ssts[index].SampleDelta / (double)TimeScale;
}
if (_mdia.IsVobSubSubtitle && SubPictures.Count > textIndex)
{
paragraphs.Add(new Paragraph(string.Empty, timeStart * 1000.0, timeEnd * 1000.0));
paragraphs.Add(new Paragraph(string.Empty, before * 1000.0, totalTime * 1000.0));
}
else if (Texts.Count > textIndex)
{
paragraphs.Add(new Paragraph(Texts[textIndex], timeStart * 1000.0, timeEnd * 1000.0));
var text = Texts[textIndex];
if (text.Size <= 2 && text.Text == null && textIndex + 1 < Texts.Count)
{
textIndex++;
text = Texts[textIndex];
}
if (!string.IsNullOrEmpty(text.Text))
{
paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0));
}
}
index++;
textIndex++;
}
if (index <= Ssts.Count && textIndex < Texts.Count && index > 0)
{
var text = Texts[textIndex];
if (!string.IsNullOrEmpty(text.Text))
{
var before = totalTime - Ssts[index - 1].SampleDelta / (double)TimeScale;
paragraphs.Add(new Paragraph(text.Text, before * 1000.0, totalTime * 1000.0));
}
}
return paragraphs;
}
}

View File

@ -0,0 +1,8 @@
namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4
{
public class ChunkText
{
public uint Size { get; set; }
public string Text { get; set; }
}
}

View File

@ -0,0 +1,9 @@
namespace Nikse.SubtitleEdit.Core.ContainerFormats.Mp4
{
public class SampleToChunkMap
{
public uint FirstChunk { get; set; }
public uint SamplesPerChunk { get; set; }
public uint SampleDescriptionIndex { get; set; }
}
}