1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-02 09:12:40 +01:00

Randomize archive order before populating search tree

This doesn't result in an elegant, perfectly balanced search tree,
but it's absolutely good enough. This commit completely mitigates
the worst-case scenario where the archive file is sorted.

Signed-off-by: Jody Bruchon <jody@jodybruchon.com>
This commit is contained in:
Jody Bruchon 2020-09-17 21:45:40 -04:00
parent 1d74d8d9f6
commit fda63a4e87

View File

@ -122,17 +122,14 @@ def __init__(self, line):
# Tree insertion
def at_insert(self, line):
# print("at_insert: ", line)
cur = self
while True:
# print("comparing ", line, cur.line)
if cur.line:
if line < cur.line:
if cur.left is None:
cur.left = ArchiveTree(line)
return
else:
# print("LEFT")
cur = cur.left
continue
elif line > cur.line:
@ -140,7 +137,6 @@ def at_insert(self, line):
cur.right = ArchiveTree(line)
return
else:
# print("RIGHT")
cur = cur.right
continue
else:
@ -426,43 +422,24 @@ def preload_download_archive(self):
if ioe.errno != errno.ENOENT:
raise
lmax = len(lines)
if lmax >= 4:
if lmax > 10:
# Populate binary search tree by splitting the archive list in half
# and then adding from the outside edges inward
# This mitigates the worst case where the archive has been sorted
ptrLL = 0
ptrLR = lmax // 2
ptrRL = ptrLR + 1
ptrRR = lmax - 1
inserted = 0
while True:
# print("ptrs: %d %d %d %d" % (ptrLL, ptrLR, ptrRL, ptrRR))
if ptrLR > ptrLL:
self.archive.at_insert(lines[ptrLR])
inserted += 1
ptrLR -= 1;
if ptrRL < ptrRR:
self.archive.at_insert(lines[ptrRL])
inserted += 1
ptrRL += 1;
if ptrLL < ptrLR:
self.archive.at_insert(lines[ptrLL])
inserted += 1
ptrLL += 1;
if ptrRR > ptrRL:
self.archive.at_insert(lines[ptrRR])
inserted += 1
ptrRR -= 1;
if ptrLL == ptrLR and ptrRL == ptrRR:
print("inserted: %d, lmax: %d" % (inserted, lmax))
pos = 0
while pos < lmax:
if lmax - pos <= 2:
break
elif lmax > 0:
# Skip multi-line logic for a single line
for idx in lines:
self.archive.at_insert(idx)
else:
target = random.randrange(pos + 1, lmax - 1)
temp = lines[pos]
lines[pos] = lines[target]
lines[target] = lines[pos]
pos += 1
elif lmax < 1:
# No lines were loaded
return False
for x in lines:
self.archive.at_insert(x)
return True
def check_deprecated(param, option, suggestion):