From 1b10446e5ecfb50d84fae88b6b8953ed19bfe1fb Mon Sep 17 00:00:00 2001
From: chunky programmer <78101139+ChunkyProgrammer@users.noreply.github.com>
Date: Mon, 24 Apr 2023 17:40:58 -0400
Subject: [PATCH] move url parsing to utils method
---
src/invidious/comments.cr | 51 +------------------------
src/invidious/helpers/utils.cr | 53 ++++++++++++++++++++++++++
src/invidious/videos/description.cr | 59 +++--------------------------
src/invidious/videos/parser.cr | 2 +-
4 files changed, 62 insertions(+), 103 deletions(-)
diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr
index fd2be73d..0c863977 100644
--- a/src/invidious/comments.cr
+++ b/src/invidious/comments.cr
@@ -635,55 +635,8 @@ def content_to_comment_html(content, video_id : String? = "")
text = HTML.escape(run["text"].as_s)
- if run["navigationEndpoint"]?
- if url = run["navigationEndpoint"]["urlEndpoint"]?.try &.["url"].as_s
- url = URI.parse(url)
- displayed_url = text
-
- if url.host == "youtu.be"
- url = "/watch?v=#{url.request_target.lstrip('/')}"
- elsif url.host.nil? || url.host.not_nil!.ends_with?("youtube.com")
- if url.path == "/redirect"
- # Sometimes, links can be corrupted (why?) so make sure to fallback
- # nicely. See https://github.com/iv-org/invidious/issues/2682
- url = url.query_params["q"]? || ""
- displayed_url = url
- else
- url = url.request_target
- displayed_url = "youtube.com#{url}"
- end
- end
-
- text = %(#{reduce_uri(displayed_url)})
- elsif watch_endpoint = run["navigationEndpoint"]["watchEndpoint"]?
- start_time = watch_endpoint["startTimeSeconds"]?.try &.as_i
- link_video_id = watch_endpoint["videoId"].as_s
-
- url = "/watch?v=#{link_video_id}"
- url += "&t=#{start_time}" if !start_time.nil?
-
- # If the current video ID (passed through from the caller function)
- # is the same as the video ID in the link, add HTML attributes for
- # the JS handler function that bypasses page reload.
- #
- # See: https://github.com/iv-org/invidious/issues/3063
- if link_video_id == video_id
- start_time ||= 0
- text = %(#{reduce_uri(text)})
- else
- text = %(#{text})
- end
- elsif url = run.dig?("navigationEndpoint", "commandMetadata", "webCommandMetadata", "url").try &.as_s
- if text.starts_with?(/\s?[@#]/)
- # Handle "pings" in comments and hasthags differently
- # See:
- # - https://github.com/iv-org/invidious/issues/3038
- # - https://github.com/iv-org/invidious/issues/3062
- text = %(#{text})
- else
- text = %(#{reduce_uri(url)})
- end
- end
+ if navigationEndpoint = run.dig?("navigationEndpoint")
+ text = parse_link_endpoint(navigationEndpoint, text, video_id)
end
text = "#{text}" if run["bold"]?
diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr
index 500a2582..bcf7c963 100644
--- a/src/invidious/helpers/utils.cr
+++ b/src/invidious/helpers/utils.cr
@@ -389,3 +389,56 @@ def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "
end
return str
end
+
+# Get the html link from a NavigationEndpoint or an innertubeCommand
+def parse_link_endpoint(endpoint : JSON::Any, text : String, video_id : String)
+ if url = endpoint.dig?("urlEndpoint", "url").try &.as_s
+ url = URI.parse(url)
+ displayed_url = text
+
+ if url.host == "youtu.be"
+ url = "/watch?v=#{url.request_target.lstrip('/')}"
+ elsif url.host.nil? || url.host.not_nil!.ends_with?("youtube.com")
+ if url.path == "/redirect"
+ # Sometimes, links can be corrupted (why?) so make sure to fallback
+ # nicely. See https://github.com/iv-org/invidious/issues/2682
+ url = url.query_params["q"]? || ""
+ displayed_url = url
+ else
+ url = url.request_target
+ displayed_url = "youtube.com#{url}"
+ end
+ end
+
+ text = %(#{reduce_uri(displayed_url)})
+ elsif watch_endpoint = endpoint.dig?("watchEndpoint")
+ start_time = watch_endpoint["startTimeSeconds"]?.try &.as_i
+ link_video_id = watch_endpoint["videoId"].as_s
+
+ url = "/watch?v=#{link_video_id}"
+ url += "&t=#{start_time}" if !start_time.nil?
+
+ # If the current video ID (passed through from the caller function)
+ # is the same as the video ID in the link, add HTML attributes for
+ # the JS handler function that bypasses page reload.
+ #
+ # See: https://github.com/iv-org/invidious/issues/3063
+ if link_video_id == video_id
+ start_time ||= 0
+ text = %(#{reduce_uri(text)})
+ else
+ text = %(#{text})
+ end
+ elsif url = endpoint.dig?("commandMetadata", "webCommandMetadata", "url").try &.as_s
+ if text.starts_with?(/\s?[@#]/)
+ # Handle "pings" in comments and hasthags differently
+ # See:
+ # - https://github.com/iv-org/invidious/issues/3038
+ # - https://github.com/iv-org/invidious/issues/3062
+ text = %(#{text})
+ else
+ text = %(#{reduce_uri(url)})
+ end
+ end
+ return text
+end
diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr
index 0a9d84f8..542cb416 100644
--- a/src/invidious/videos/description.cr
+++ b/src/invidious/videos/description.cr
@@ -1,57 +1,6 @@
require "json"
require "uri"
-def parse_command(command : JSON::Any?, string : String) : String?
- on_tap = command.dig?("onTap", "innertubeCommand")
-
- # 3rd party URL, extract original URL from YouTube tracking URL
- if url_endpoint = on_tap.try &.["urlEndpoint"]?
- if url_endpoint["url"].as_s.includes? "youtube.com/redirect"
- youtube_url = URI.parse url_endpoint["url"].as_s
- original_url = youtube_url.query_params["q"]?
- if original_url.nil?
- return ""
- else
- return "#{original_url}"
- end
- else
- # not a redirect url, some first party url
- # see https://github.com/iv-org/invidious/issues/3751
- first_party_url = url_endpoint["url"].as_s
- return "#{first_party_url}"
- end
- # 1st party watch URL
- elsif watch_endpoint = on_tap.try &.["watchEndpoint"]?
- video_id = watch_endpoint["videoId"].as_s
- time = watch_endpoint["startTimeSeconds"].as_i
-
- url = "/watch?v=#{video_id}&t=#{time}s"
-
- # if string is a timestamp, use the string instead
- # this is a lazy regex for validating timestamps
- if /(?:\d{1,2}:){1,2}\d{2}/ =~ string
- return "#{string}"
- else
- return "#{url}"
- end
- # hashtag/other browse URLs
- elsif browse_endpoint = on_tap.try &.dig?("commandMetadata", "webCommandMetadata")
- url = browse_endpoint["url"].try &.as_s
-
- # remove unnecessary character in a channel name
- if browse_endpoint["webPageType"]?.try &.as_s == "WEB_PAGE_TYPE_CHANNEL"
- name = string.match(/@[\w\d.-]+/)
- if name.try &.[0]?
- return "#{name.try &.[0]}"
- end
- end
-
- return "#{string}"
- end
-
- return "(unknown YouTube desc command)"
-end
-
private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int
copied = 0
while copied < count
@@ -68,7 +17,7 @@ private def copy_string(str : String::Builder, iter : Iterator, count : Int) : I
return copied
end
-def parse_description(desc : JSON::Any?) : String?
+def parse_description(desc, video_id : String) : String?
return "" if desc.nil?
content = desc["content"].as_s
@@ -100,7 +49,11 @@ def parse_description(desc : JSON::Any?) : String?
copy_string(str2, iter, cmd_length)
end
- str << parse_command(command, cmd_content)
+ link = cmd_content
+ if on_tap = command.dig?("onTap", "innertubeCommand")
+ link = parse_link_endpoint(on_tap, cmd_content, video_id)
+ end
+ str << link
index += cmd_length
end
diff --git a/src/invidious/videos/parser.cr b/src/invidious/videos/parser.cr
index 1c6d118d..2e8eecc3 100644
--- a/src/invidious/videos/parser.cr
+++ b/src/invidious/videos/parser.cr
@@ -287,7 +287,7 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
# description_html = video_secondary_renderer.try &.dig?("description", "runs")
# .try &.as_a.try { |t| content_to_comment_html(t, video_id) }
- description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription"))
+ description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription"), video_id)
# Video metadata