From 9657463717048519dd3683d2f2add52fa2c91205 Mon Sep 17 00:00:00 2001 From: BlackDex Date: Sun, 27 Jan 2019 15:39:19 +0100 Subject: [PATCH] Added better favicon downloader. --- Cargo.lock | 124 ++++++++++++++++++++++++++++++++++ Cargo.toml | 5 ++ src/api/icons.rs | 172 +++++++++++++++++++++++++++++++++++++++++++++-- src/error.rs | 2 + src/main.rs | 1 + 5 files changed, 299 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5cd8f014..83fd193d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,6 +136,7 @@ dependencies = [ "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "oath 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", "paste 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.9 (registry+https://github.com/rust-lang/crates.io-index)", "ring 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)", "rmpv 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -144,6 +145,7 @@ dependencies = [ "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "soup 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "syslog 4.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "u2f 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -694,6 +696,15 @@ name = "fuchsia-zircon-sys" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "new_debug_unreachable 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "futures" version = "0.1.25" @@ -799,6 +810,19 @@ dependencies = [ "winutil 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "html5ever" +version = "0.22.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "http" version = "0.1.15" @@ -1080,11 +1104,31 @@ dependencies = [ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "maplit" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "markup5ever" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "matches" version = "0.1.8" @@ -1262,6 +1306,14 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "nickel" version = "0.11.0" @@ -1586,6 +1638,11 @@ dependencies = [ "typemap 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "proc-macro-hack" version = "0.5.4" @@ -2135,6 +2192,15 @@ dependencies = [ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "soup" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "stable_deref_trait" version = "1.1.1" @@ -2150,6 +2216,37 @@ name = "string" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "string_cache" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "new_debug_unreachable 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", + "string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "string_cache_codegen" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", + "string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "string_cache_shared" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "subtle" version = "1.0.0" @@ -2232,6 +2329,16 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tendril" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "thread-id" version = "3.3.0" @@ -2526,6 +2633,11 @@ dependencies = [ "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "utf-8" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "utf8-ranges" version = "1.0.2" @@ -2775,6 +2887,7 @@ dependencies = [ "checksum fsevent-sys 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1a772d36c338d07a032d5375a36f15f9a7043bf0cb8ce7cee658e037c6032874" "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +"checksum futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" "checksum futures 0.1.25 (registry+https://github.com/rust-lang/crates.io-index)" = "49e7653e374fe0d0c12de4250f0bdb60680b8c80eed558c5c7538eec9c89e21b" "checksum futures-cpupool 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "ab90cde24b3319636588d0c35fe03b1333857621051837ed769faefb4c2162e4" "checksum generic-array 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c0f28c2f5bfb5960175af447a2da7c18900693738343dc896ffbcabd9839592" @@ -2786,6 +2899,7 @@ dependencies = [ "checksum hmac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdb5aa9647ba4711e9d6968dc1c810cd23989ed435443ca962e1bf6d8b8b83ff" "checksum hmac 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f127a908633569f208325f86f71255d3363c79721d7f9fe31cd5569908819771" "checksum hostname 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "21ceb46a83a85e824ef93669c8b390009623863b5c195d1ba747292c0c72f94e" +"checksum html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c213fa6a618dc1da552f54f85cba74b05d8e883c92ec4e89067736938084c26e" "checksum http 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "1a10e5b573b9a0146545010f50772b9e8b1dd0a256564cc4307694c68832a2f5" "checksum httparse 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e8734b0cfd3bc3e101ec59100e101c2eecd19282202e87808b3037b442777a83" "checksum hyper 0.10.15 (registry+https://github.com/rust-lang/crates.io-index)" = "df0caae6b71d266b91b4a83111a61d2b94ed2e2bea024c532b933dcff867e58c" @@ -2813,7 +2927,9 @@ dependencies = [ "checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" "checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" +"checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" "checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43" +"checksum markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "897636f9850c3eef4905a5540683ed53dc9393860f0846cab2c2ddf9939862ff" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" "checksum memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e1dd4eaac298c32ce07eb6ed9242eda7d82955b9170b7d6db59b2e02cc63fcb8" "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" @@ -2831,6 +2947,7 @@ dependencies = [ "checksum mustache 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "51956ef1c5d20a1384524d91e616fb44dfc7d8f249bf696d49c97dd3289ecab5" "checksum native-tls 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ff8e08de0070bbf4c31f452ea2a70db092f36f6f2e4d897adf5674477d488fb2" "checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88" +"checksum new_debug_unreachable 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0cdc457076c78ab54d5e0d6fa7c47981757f1e34dc39ff92787f217dede586c4" "checksum nickel 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e5061a832728db2dacb61cefe0ce303b58f85764ec680e71d9138229640a46d9" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" "checksum nom 4.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9c349f68f25f596b9f44cf0e7c69752a5c633b0550c3ff849518bfba0233774a" @@ -2866,6 +2983,7 @@ dependencies = [ "checksum phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" "checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" "checksum plugin 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1a6a0dc3910bc8db877ffed8e457763b317cf880df4ae19109b9f77d277cf6e0" +"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" "checksum proc-macro-hack 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3e90aa19cd73dedc2d0e1e8407473f073d735fef0ab521438de6da8ee449ab66" "checksum proc-macro2 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)" = "38fddd23d98b2144d197c0eca5705632d4fe2667d14a6be5df8934f8d74f1978" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" @@ -2925,9 +3043,13 @@ dependencies = [ "checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15" +"checksum soup 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a01e71d2369059bdc6abc876c062c4065a858baa52ecb4fbdb515da822ee04a4" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" "checksum state 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7345c971d1ef21ffdbd103a75990a15eb03604fc8b8852ca8cb418ee1a099028" "checksum string 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b639411d0b9c738748b5397d5ceba08e648f4f1992231aa859af1a017f31f60b" +"checksum string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "25d70109977172b127fe834e5449e5ab1740b9ba49fa18a2020f509174f25423" +"checksum string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1eea1eee654ef80933142157fdad9dd8bc43cf7c74e999e369263496f04ff4da" +"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" "checksum subtle 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2d67a5a62ba6e01cb2192ff309324cb4875d0c451d55fe2319433abe7a05a8ee" "checksum subtle 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "702662512f3ddeb74a64ce2fbbf3707ee1b6bb663d28bb054e0779bbc720d926" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" @@ -2937,6 +3059,7 @@ dependencies = [ "checksum syslog 4.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a0641142b4081d3d44beffa4eefd7346a228cdf91ed70186db2ca2cef762d327" "checksum tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" "checksum tempfile 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7e91405c14320e5c79b3d148e1c86f40749a36e490642202a31689cb1a3452b2" +"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" "checksum thread-id 3.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum threadpool 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e2f0c90a5f3459330ac8bc0d2f879c693bb7a2f59689c1083fc4ef83834da865" @@ -2970,6 +3093,7 @@ dependencies = [ "checksum unsafe-any 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f30360d7979f5e9c6e6cea48af192ea8fab4afb3cf72597154b8f08935bc9c7f" "checksum untrusted 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "55cd1f4b4e96b46aeb8d4855db4a7a9bd96eeeb5c6a1ab54593328761642ce2f" "checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" +"checksum utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" "checksum uuid 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dab5c5526c5caa3d106653401a267fed923e7046f35895ffcb5ca42db64942e6" "checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d" diff --git a/Cargo.toml b/Cargo.toml index 0b16852f..166597b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -98,6 +98,10 @@ handlebars = "1.1.0" # Macro ident-combining library paste = "0.1.4" +# For favicon extraction from main website +soup = "0.3.0" +regex = "1.1.0" + [patch.crates-io] # Add support for Timestamp type rmp = { git = 'https://github.com/dani-garcia/msgpack-rust' } @@ -105,3 +109,4 @@ rmp = { git = 'https://github.com/dani-garcia/msgpack-rust' } # Use new native_tls version 0.2 lettre = { git = 'https://github.com/lettre/lettre', rev = 'c988b1760ad81' } lettre_email = { git = 'https://github.com/lettre/lettre', rev = 'c988b1760ad81' } + diff --git a/src/api/icons.rs b/src/api/icons.rs index 2d544717..540be039 100644 --- a/src/api/icons.rs +++ b/src/api/icons.rs @@ -7,10 +7,27 @@ use rocket::response::Content; use rocket::Route; use reqwest; +use reqwest::Client; +use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT, ACCEPT_LANGUAGE, CACHE_CONTROL, PRAGMA, ACCEPT}; +use std::time::Duration; use crate::error::Error; +//use std::error::Error as StdError; use crate::CONFIG; +//extern crate regex; +use regex::Regex; + +//extern crate soup; +use soup::prelude::*; + +use std::vec::Vec; +#[derive(Debug)] +struct IconList { + priority: u8, + href: String, +} + pub fn routes() -> Vec { routes![icon] } @@ -38,7 +55,7 @@ fn get_icon(domain: &str) -> Vec { return icon; } - let url = get_icon_url(&domain); + let url = get_icon_url(&domain).unwrap(); // Get the icon, or fallback in case of error match download_icon(&url) { @@ -114,11 +131,156 @@ fn icon_is_expired(path: &str) -> bool { expired.unwrap_or(true) } -fn get_icon_url(domain: &str) -> String { - if CONFIG.local_icon_extractor() { - format!("http://{}/favicon.ico", domain) +/// Returns a Result with a String which holds the preferend favicon location. +/// There will always be a result with a string which will contain https://example.com/favicon.ico +/// This does not mean that that location does exists, but it is the default location. +/// +/// # Argument +/// * `domain` - A string which holds the domain with extension. +/// +/// # Example +/// ``` +/// favicon_location1 = get_icon_url("github.com"); +/// favicon_location2 = get_icon_url("gitlab.com"); +/// ``` +fn get_icon_url(domain: &str) -> Result { + // Set some default headers for the request. + // Use a browser like user-agent to make sure most websites will return there correct website. + let mut headers = HeaderMap::new(); + headers.insert(USER_AGENT, HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299")); + headers.insert(ACCEPT_LANGUAGE, HeaderValue::from_static("en-US,en;q=0.8")); + headers.insert(CACHE_CONTROL, HeaderValue::from_static("no-cache")); + headers.insert(PRAGMA, HeaderValue::from_static("no-cache")); + headers.insert(ACCEPT, HeaderValue::from_static("text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8")); + + let client = Client::builder() + .gzip(true) + .timeout(Duration::from_secs(5)) + .default_headers(headers) + .build()?; + + // Default URL with secure and insecure schemes + let ssldomain = format!("https://{}", domain); + let httpdomain = format!("http://{}", domain); + + // Create the iconlist + let mut iconlist: Vec = Vec::new(); + + let resp = client.get(&ssldomain).send().or_else(|_| client.get(&httpdomain).send()); + if let Ok(mut content) = resp { + let body = content.text().unwrap(); + // Extract the URL from te respose incase redirects occured (like @ gitlab.com) + let url = format!("{}://{}", content.url().scheme(), content.url().host().unwrap()); + + // Add the default favicon.ico to the list with the domain the content responded from. + iconlist.push(IconList { priority: 35, href: format!("{}{}", url, "/favicon.ico") }); + + let soup = Soup::new(&body); + // Search for and filter + let favicons = soup + .tag("link") + .attr("rel", Regex::new(r"icon$|apple.*icon")?) // Only use icon rels + .attr("href", Regex::new(r"(?i)\w+(\.jp(e){0,1}g$|\.png$|\.ico$)")?) // Only allow specific extensions + .find_all(); + + // Loop through all the found icons and determine it's priority + for favicon in favicons { + let favicon_sizes = favicon.get("sizes").unwrap_or("".to_string()).to_string(); + let favicon_href = fix_href(&favicon.get("href").unwrap_or("".to_string()).to_string(), &url); + let favicon_priority = get_icon_priority(&favicon_href, &favicon_sizes); + + iconlist.push(IconList { priority: favicon_priority, href: favicon_href}) + } } else { - format!("https://icons.bitwarden.com/{}/icon.png", domain) + // Add the default favicon.ico to the list with just the given domain + iconlist.push(IconList { priority: 35, href: format!("{}{}", ssldomain, "/favicon.ico") }); + } + + // Sort the iconlist by priority + iconlist.sort_by_key(|x| x.priority); + + // There always is an icon in the list, so no need to check if it exists, and just return the first one + Ok(format!("{}", &iconlist[0].href)) +} + +/// Returns a Integer with the priority of the type of the icon which to prefer. +/// The lower the number the better. +/// +/// # Arguments +/// * `href` - A string which holds the href value or relative path. +/// * `sizes` - The size of the icon if available as a x value like 32x32. +/// +/// # Example +/// ``` +/// priority1 = get_icon_priority("http://example.com/path/to/a/favicon.png", "32x32"); +/// priority2 = get_icon_priority("https://example.com/path/to/a/favicon.ico", ""); +/// ``` +fn get_icon_priority(href: &str, sizes: &str) -> u8 { + // Check if there is a dimension set + if ! sizes.is_empty() { + let dimensions : Vec<&str> = sizes.split("x").collect(); + let width = dimensions[0].parse::().unwrap(); + let height = dimensions[1].parse::().unwrap(); + + // Only allow square dimensions + if width == height { + // Change priority by given size + if width == 32 { + 1 + } else if width == 64 { + 2 + } else if width >= 24 && width <= 128 { + 3 + } else if width == 16 { + 4 + } else { + 100 + } + } else { + 200 + } + } else { + // Change priority by file extension + if href.ends_with(".png") { + 10 + } else if href.ends_with(".jpg") || href.ends_with(".jpeg") { + 20 + } else { + 30 + } + } +} + +/// Returns a String which will have the given href fixed by adding the correct URL if it does not have this already. +/// +/// # Arguments +/// * `href` - A string which holds the href value or relative path. +/// * `url` - A string which holds the URL including http(s) which will preseed the href when needed. +/// +/// # Example +/// ``` +/// fixed_href1 = fix_href("/path/to/a/favicon.png", "https://eample.com"); +/// fixed_href2 = fix_href("//example.com/path/to/a/second/favicon.jpg", "https://eample.com"); +/// ``` +fn fix_href(href: &str, url: &str) -> String { + // When the href is starting with //, so without a scheme is valid and would use the browsers scheme. + // We need to detect this and add the scheme here. + if href.starts_with("//") { + if url.starts_with("https") { + format!("https:{}", href) + } else { + format!("http:{}", href) + } + // If the href_output just starts with a single / it does not have the host here at all. + } else if ! href.starts_with("http") { + if href.starts_with("/") { + format!("{}{}", url, href) + } else { + format!("{}/{}", url, href) + } + // All seems oke, just return the given href + } else { + format!("{}", href) } } diff --git a/src/error.rs b/src/error.rs index 741a7429..fcfcac13 100644 --- a/src/error.rs +++ b/src/error.rs @@ -40,6 +40,7 @@ use serde_json::{Error as SerdeErr, Value}; use std::io::Error as IOErr; use std::time::SystemTimeError as TimeErr; use u2f::u2ferror::U2fError as U2fErr; +use regex::Error as RegexErr; // Error struct // Contains a String error message, meant for the user and an enum variant, with an error of different types. @@ -60,6 +61,7 @@ make_error! { IOError(IOErr): _has_source, _api_error, TimeError(TimeErr): _has_source, _api_error, ReqError(ReqErr): _has_source, _api_error, + RegexError(RegexErr): _has_source, _api_error, } impl std::fmt::Debug for Error { diff --git a/src/main.rs b/src/main.rs index 261c99d7..18e2b83b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -99,6 +99,7 @@ fn init_logging() -> Result<(), fern::InitError> { .level_for("handlebars", log::LevelFilter::Warn) .level_for("ws", log::LevelFilter::Info) .level_for("multipart", log::LevelFilter::Info) + .level_for("html5ever", log::LevelFilter::Info) .chain(std::io::stdout()); if let Some(log_file) = CONFIG.log_file() {