From 1b10446e5ecfb50d84fae88b6b8953ed19bfe1fb Mon Sep 17 00:00:00 2001 From: chunky programmer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Mon, 24 Apr 2023 17:40:58 -0400 Subject: [PATCH] move url parsing to utils method --- src/invidious/comments.cr | 51 +------------------------ src/invidious/helpers/utils.cr | 53 ++++++++++++++++++++++++++ src/invidious/videos/description.cr | 59 +++-------------------------- src/invidious/videos/parser.cr | 2 +- 4 files changed, 62 insertions(+), 103 deletions(-) diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr index fd2be73d..0c863977 100644 --- a/src/invidious/comments.cr +++ b/src/invidious/comments.cr @@ -635,55 +635,8 @@ def content_to_comment_html(content, video_id : String? = "") text = HTML.escape(run["text"].as_s) - if run["navigationEndpoint"]? - if url = run["navigationEndpoint"]["urlEndpoint"]?.try &.["url"].as_s - url = URI.parse(url) - displayed_url = text - - if url.host == "youtu.be" - url = "/watch?v=#{url.request_target.lstrip('/')}" - elsif url.host.nil? || url.host.not_nil!.ends_with?("youtube.com") - if url.path == "/redirect" - # Sometimes, links can be corrupted (why?) so make sure to fallback - # nicely. See https://github.com/iv-org/invidious/issues/2682 - url = url.query_params["q"]? || "" - displayed_url = url - else - url = url.request_target - displayed_url = "youtube.com#{url}" - end - end - - text = %(#{reduce_uri(displayed_url)}) - elsif watch_endpoint = run["navigationEndpoint"]["watchEndpoint"]? - start_time = watch_endpoint["startTimeSeconds"]?.try &.as_i - link_video_id = watch_endpoint["videoId"].as_s - - url = "/watch?v=#{link_video_id}" - url += "&t=#{start_time}" if !start_time.nil? - - # If the current video ID (passed through from the caller function) - # is the same as the video ID in the link, add HTML attributes for - # the JS handler function that bypasses page reload. - # - # See: https://github.com/iv-org/invidious/issues/3063 - if link_video_id == video_id - start_time ||= 0 - text = %(#{reduce_uri(text)}) - else - text = %(#{text}) - end - elsif url = run.dig?("navigationEndpoint", "commandMetadata", "webCommandMetadata", "url").try &.as_s - if text.starts_with?(/\s?[@#]/) - # Handle "pings" in comments and hasthags differently - # See: - # - https://github.com/iv-org/invidious/issues/3038 - # - https://github.com/iv-org/invidious/issues/3062 - text = %(#{text}) - else - text = %(#{reduce_uri(url)}) - end - end + if navigationEndpoint = run.dig?("navigationEndpoint") + text = parse_link_endpoint(navigationEndpoint, text, video_id) end text = "#{text}" if run["bold"]? diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 500a2582..bcf7c963 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -389,3 +389,56 @@ def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = " end return str end + +# Get the html link from a NavigationEndpoint or an innertubeCommand +def parse_link_endpoint(endpoint : JSON::Any, text : String, video_id : String) + if url = endpoint.dig?("urlEndpoint", "url").try &.as_s + url = URI.parse(url) + displayed_url = text + + if url.host == "youtu.be" + url = "/watch?v=#{url.request_target.lstrip('/')}" + elsif url.host.nil? || url.host.not_nil!.ends_with?("youtube.com") + if url.path == "/redirect" + # Sometimes, links can be corrupted (why?) so make sure to fallback + # nicely. See https://github.com/iv-org/invidious/issues/2682 + url = url.query_params["q"]? || "" + displayed_url = url + else + url = url.request_target + displayed_url = "youtube.com#{url}" + end + end + + text = %(#{reduce_uri(displayed_url)}) + elsif watch_endpoint = endpoint.dig?("watchEndpoint") + start_time = watch_endpoint["startTimeSeconds"]?.try &.as_i + link_video_id = watch_endpoint["videoId"].as_s + + url = "/watch?v=#{link_video_id}" + url += "&t=#{start_time}" if !start_time.nil? + + # If the current video ID (passed through from the caller function) + # is the same as the video ID in the link, add HTML attributes for + # the JS handler function that bypasses page reload. + # + # See: https://github.com/iv-org/invidious/issues/3063 + if link_video_id == video_id + start_time ||= 0 + text = %(#{reduce_uri(text)}) + else + text = %(#{text}) + end + elsif url = endpoint.dig?("commandMetadata", "webCommandMetadata", "url").try &.as_s + if text.starts_with?(/\s?[@#]/) + # Handle "pings" in comments and hasthags differently + # See: + # - https://github.com/iv-org/invidious/issues/3038 + # - https://github.com/iv-org/invidious/issues/3062 + text = %(#{text}) + else + text = %(#{reduce_uri(url)}) + end + end + return text +end diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr index 0a9d84f8..542cb416 100644 --- a/src/invidious/videos/description.cr +++ b/src/invidious/videos/description.cr @@ -1,57 +1,6 @@ require "json" require "uri" -def parse_command(command : JSON::Any?, string : String) : String? - on_tap = command.dig?("onTap", "innertubeCommand") - - # 3rd party URL, extract original URL from YouTube tracking URL - if url_endpoint = on_tap.try &.["urlEndpoint"]? - if url_endpoint["url"].as_s.includes? "youtube.com/redirect" - youtube_url = URI.parse url_endpoint["url"].as_s - original_url = youtube_url.query_params["q"]? - if original_url.nil? - return "" - else - return "#{original_url}" - end - else - # not a redirect url, some first party url - # see https://github.com/iv-org/invidious/issues/3751 - first_party_url = url_endpoint["url"].as_s - return "#{first_party_url}" - end - # 1st party watch URL - elsif watch_endpoint = on_tap.try &.["watchEndpoint"]? - video_id = watch_endpoint["videoId"].as_s - time = watch_endpoint["startTimeSeconds"].as_i - - url = "/watch?v=#{video_id}&t=#{time}s" - - # if string is a timestamp, use the string instead - # this is a lazy regex for validating timestamps - if /(?:\d{1,2}:){1,2}\d{2}/ =~ string - return "#{string}" - else - return "#{url}" - end - # hashtag/other browse URLs - elsif browse_endpoint = on_tap.try &.dig?("commandMetadata", "webCommandMetadata") - url = browse_endpoint["url"].try &.as_s - - # remove unnecessary character in a channel name - if browse_endpoint["webPageType"]?.try &.as_s == "WEB_PAGE_TYPE_CHANNEL" - name = string.match(/@[\w\d.-]+/) - if name.try &.[0]? - return "#{name.try &.[0]}" - end - end - - return "#{string}" - end - - return "(unknown YouTube desc command)" -end - private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int copied = 0 while copied < count @@ -68,7 +17,7 @@ private def copy_string(str : String::Builder, iter : Iterator, count : Int) : I return copied end -def parse_description(desc : JSON::Any?) : String? +def parse_description(desc, video_id : String) : String? return "" if desc.nil? content = desc["content"].as_s @@ -100,7 +49,11 @@ def parse_description(desc : JSON::Any?) : String? copy_string(str2, iter, cmd_length) end - str << parse_command(command, cmd_content) + link = cmd_content + if on_tap = command.dig?("onTap", "innertubeCommand") + link = parse_link_endpoint(on_tap, cmd_content, video_id) + end + str << link index += cmd_length end diff --git a/src/invidious/videos/parser.cr b/src/invidious/videos/parser.cr index 1c6d118d..2e8eecc3 100644 --- a/src/invidious/videos/parser.cr +++ b/src/invidious/videos/parser.cr @@ -287,7 +287,7 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any # description_html = video_secondary_renderer.try &.dig?("description", "runs") # .try &.as_a.try { |t| content_to_comment_html(t, video_id) } - description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription")) + description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription"), video_id) # Video metadata