From fd87f4237892cd8e3120598233c765bfcc6561f6 Mon Sep 17 00:00:00 2001 From: Jody Bruchon Date: Fri, 18 Sep 2020 14:22:42 -0400 Subject: [PATCH 1/2] Randomize the ArchiveTree the proper Python way Signed-off-by: Jody Bruchon --- youtube_dlc/YoutubeDL.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index 0bdc98321..595c4be54 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -424,16 +424,7 @@ def preload_download_archive(self): raise lmax = len(lines) if lmax > 10: - pos = 0 - while pos < lmax: - if lmax - pos <= 2: - break - target = random.randrange(pos + 1, lmax - 1) - # Swap line at pos with randomly chosen target - temp = lines[pos] - lines[pos] = lines[target] - lines[target] = temp - pos += 1 + random.shuffle(lines) elif lmax < 1: # No lines were loaded return False From a45e8619182453069efb0ccb9093f2fe7a8c744a Mon Sep 17 00:00:00 2001 From: Jody Bruchon Date: Fri, 18 Sep 2020 21:18:23 -0400 Subject: [PATCH 2/2] Switch from binary search tree to Python sets Signed-off-by: Jody Bruchon --- youtube_dlc/YoutubeDL.py | 64 +++------------------------------------- 1 file changed, 4 insertions(+), 60 deletions(-) diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index 595c4be54..c7e3eb01e 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -114,54 +114,6 @@ import ctypes -class ArchiveTree(object): - """Binary search tree for download archive entries""" - def __init__(self, line): - self.left = None - self.right = None - self.line = line - - # Tree insertion - def at_insert(self, line): - cur = self - while True: - if cur.line: - if line < cur.line: - if cur.left is None: - cur.left = ArchiveTree(line) - return - else: - cur = cur.left - continue - elif line > cur.line: - if cur.right is None: - cur.right = ArchiveTree(line) - return - else: - cur = cur.right - continue - else: - # Duplicate line found - return - else: - cur.line = line - return - - def at_exist(self, line): - if self.line is None: - return False - if line < self.line: - if self.left is None: - return False - return self.left.at_exist(line) - elif line > self.line: - if self.right is None: - return False - return self.right.at_exist(line) - else: - return True - - class YoutubeDL(object): """YoutubeDL class. @@ -407,29 +359,21 @@ def __init__(self, params=None, auto_init=True): } self.params.update(params) self.cache = Cache(self) - self.archive = ArchiveTree(None) + self.archive = set() """Preload the archive, if any is specified""" def preload_download_archive(self): - lines = [] fn = self.params.get('download_archive') if fn is None: return False try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: - lines.append(line.strip()) + self.archive.add(line.strip()) except IOError as ioe: if ioe.errno != errno.ENOENT: raise - lmax = len(lines) - if lmax > 10: - random.shuffle(lines) - elif lmax < 1: - # No lines were loaded return False - for x in lines: - self.archive.at_insert(x) return True def check_deprecated(param, option, suggestion): @@ -2219,7 +2163,7 @@ def in_download_archive(self, info_dict): if not vid_id: return False # Incomplete video information - return self.archive.at_exist(vid_id) + return vid_id in self.archive def record_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -2229,7 +2173,7 @@ def record_download_archive(self, info_dict): assert vid_id with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + '\n') - self.archive.at_insert(vid_id) + self.archive.add(vid_id) @staticmethod def format_resolution(format, default='unknown'):