mirror of
https://github.com/blackjack4494/yt-dlc.git
synced 2024-11-15 15:32:31 +01:00
[utlis] add extract_attributes for extracting html tags attributes
This commit is contained in:
parent
1721fef28b
commit
689fb748ee
@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html):
|
|||||||
return unescapeHTML(res)
|
return unescapeHTML(res)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
|
||||||
|
attributes = re.findall(attributes_regex, attributes_str)
|
||||||
|
attributes_dict = {}
|
||||||
|
if attributes:
|
||||||
|
attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes}
|
||||||
|
return attributes_dict
|
||||||
|
|
||||||
|
|
||||||
def clean_html(html):
|
def clean_html(html):
|
||||||
"""Clean an HTML snippet into a readable string"""
|
"""Clean an HTML snippet into a readable string"""
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user