From 3df33fc522e901962f805bfff1d2cb5e23a79849 Mon Sep 17 00:00:00 2001 From: Sam Willcocks Date: Tue, 13 Sep 2022 16:36:19 +0100 Subject: [PATCH] Add attribution of tweets, scraping cmds and dry run --- Cargo.lock | 749 +++++++++++++++++++++++++++++++++++++++++++++---- Cargo.toml | 14 +- src/main.rs | 499 ++++++++++++++------------------ src/twitter.rs | 276 ++++++++++++++++++ src/wiki.rs | 204 ++++++++++++++ 5 files changed, 1382 insertions(+), 360 deletions(-) create mode 100644 src/twitter.rs create mode 100644 src/wiki.rs diff --git a/Cargo.lock b/Cargo.lock index 816591b..2e1da87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,10 +15,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] -name = "ansi_term" -version = "0.11.0" +name = "aho-corasick" +version = "0.7.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_colours" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32678233b67f9056b0c144b39d46dc3218637e8d84ad6038ded339e08b19620d" +dependencies = [ + "rgb", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" dependencies = [ "winapi", ] @@ -76,6 +94,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "bit_field" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4" + [[package]] name = "bitflags" version = "1.3.2" @@ -120,17 +144,60 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "2.33.3" +version = "3.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +checksum = "23b71c3ce99b7611011217b366d923f1d0a7e07a92bb2dbf1e84508c673ca3bd" dependencies = [ - "ansi_term", "atty", "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "once_cell", "strsim", + "termcolor", "textwrap", - "unicode-width", - "vec_map", +] + +[[package]] +name = "clap_derive" +version = "3.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + +[[package]] +name = "console" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "terminal_size", + "winapi", ] [[package]] @@ -170,6 +237,76 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "once_cell", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "crossterm" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17" +dependencies = [ + "bitflags", + "crossterm_winapi", + "libc", + "mio 0.8.4", + "parking_lot 0.12.1", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" +dependencies = [ + "winapi", +] + [[package]] name = "cssparser" version = "0.27.2" @@ -261,6 +398,12 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding" version = "0.2.33" @@ -334,6 +477,21 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "exr" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c26a90d9dd411a3d119d6f55752fb4c134ca243250c32fb9cab7b2561638d2" +dependencies = [ + "bit_field", + "flume", + "half", + "lebe", + "miniz_oxide 0.5.4", + "smallvec", + "threadpool", +] + [[package]] name = "flate2" version = "1.0.22" @@ -343,7 +501,7 @@ dependencies = [ "cfg-if", "crc32fast", "libc", - "miniz_oxide", + "miniz_oxide 0.4.4", ] [[package]] @@ -352,6 +510,19 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +[[package]] +name = "flume" +version = "0.10.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" +dependencies = [ + "futures-core", + "futures-sink", + "nanorand", + "pin-project", + "spin 0.9.4", +] + [[package]] name = "fnv" version = "1.0.7" @@ -386,11 +557,10 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" dependencies = [ - "matches", "percent-encoding", ] @@ -484,13 +654,25 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" dependencies = [ "cfg-if", + "js-sys", "libc", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "gif" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3edd93c6756b4dfaf2709eafcc345ba2636565295c198a9cfbf75fa5e3e00b06" +dependencies = [ + "color_quant", + "weezl", ] [[package]] @@ -512,12 +694,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -614,15 +808,33 @@ dependencies = [ [[package]] name = "idna" -version = "0.2.3" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" dependencies = [ - "matches", "unicode-bidi", "unicode-normalization", ] +[[package]] +name = "image" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e30ca2ecf7666107ff827a8e481de6a132a9b687ed3bb20bb1c144a36c00964" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "exr", + "gif", + "jpeg-decoder 0.2.6", + "num-rational", + "num-traits", + "png", + "scoped_threadpool", + "tiff", +] + [[package]] name = "indexmap" version = "1.7.0" @@ -653,9 +865,11 @@ name = "iso7010_a_day" version = "0.1.0" dependencies = [ "clap", + "image", "itertools", "oauth1", "rand 0.8.4", + "regex", "reqwest", "resvg", "scraper", @@ -663,8 +877,11 @@ dependencies = [ "serde_json", "tiny-skia", "toml", + "tracing", + "tracing-subscriber", "url", "usvg", + "viuer", "webbrowser", ] @@ -689,6 +906,15 @@ version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "229d53d58899083193af11e15917b5640cd40b29ff475a1fe4ef725deb02d0f2" +[[package]] +name = "jpeg-decoder" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9478aa10f73e7528198d75109c8be5cd7d15fb530238040148d5f9a22d4c5b3b" +dependencies = [ + "rayon", +] + [[package]] name = "js-sys" version = "0.3.55" @@ -714,16 +940,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] -name = "libc" -version = "0.2.105" +name = "lebe" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "869d572136620d55835903746bcb5cdc54cb2851fd0aeec53220b4bb65ef3013" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + +[[package]] +name = "libc" +version = "0.2.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" [[package]] name = "lock_api" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" +checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" dependencies = [ "scopeguard", ] @@ -778,6 +1010,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" @@ -804,6 +1045,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "miniz_oxide" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" +dependencies = [ + "adler", +] + [[package]] name = "mio" version = "0.7.14" @@ -817,6 +1067,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "mio" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +dependencies = [ + "libc", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys", +] + [[package]] name = "miow" version = "0.3.7" @@ -826,6 +1088,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +dependencies = [ + "getrandom 0.2.7", +] + [[package]] name = "native-tls" version = "0.2.8" @@ -865,6 +1136,36 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.13.0" @@ -890,9 +1191,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.8.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0" [[package]] name = "openssl" @@ -927,6 +1228,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "os_str_bytes" +version = "6.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" + [[package]] name = "parking_lot" version = "0.11.2" @@ -935,7 +1242,17 @@ checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ "instant", "lock_api", - "parking_lot_core", + "parking_lot_core 0.8.5", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core 0.9.3", ] [[package]] @@ -953,10 +1270,23 @@ dependencies = [ ] [[package]] -name = "percent-encoding" -version = "2.1.0" +name = "parking_lot_core" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "percent-encoding" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "pest" @@ -1028,10 +1358,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" [[package]] -name = "pin-project-lite" -version = "0.2.7" +name = "pin-project" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" +checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" [[package]] name = "pin-utils" @@ -1055,7 +1405,7 @@ dependencies = [ "crc32fast", "deflate", "encoding", - "miniz_oxide", + "miniz_oxide 0.4.4", ] [[package]] @@ -1070,6 +1420,30 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -1155,7 +1529,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.7", ] [[package]] @@ -1185,6 +1559,30 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "rctree" version = "0.4.0" @@ -1200,6 +1598,23 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -1251,7 +1666,7 @@ version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "256cc9203115db152290219f35f3362e729301b59e2a391fb2721fe3fa155352" dependencies = [ - "jpeg-decoder", + "jpeg-decoder 0.1.22", "log", "pico-args", "png", @@ -1279,7 +1694,7 @@ dependencies = [ "cc", "libc", "once_cell", - "spin", + "spin 0.5.2", "untrusted", "web-sys", "winapi", @@ -1353,6 +1768,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "scoped_threadpool" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" + [[package]] name = "scopeguard" version = "1.1.0" @@ -1456,6 +1877,9 @@ name = "serde" version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" @@ -1507,6 +1931,45 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "signal-hook" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" +dependencies = [ + "libc", + "mio 0.8.4", + "signal-hook", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +dependencies = [ + "libc", +] + [[package]] name = "simplecss" version = "0.2.1" @@ -1550,6 +2013,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +dependencies = [ + "lock_api", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -1622,7 +2094,7 @@ checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6" dependencies = [ "lazy_static", "new_debug_unreachable", - "parking_lot", + "parking_lot 0.11.2", "phf_shared", "precomputed-hash", "serde", @@ -1642,9 +2114,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "svgfilters" @@ -1702,20 +2174,65 @@ dependencies = [ ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "termcolor" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ - "unicode-width", + "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + [[package]] name = "thin-slice" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "tiff" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7259662e32d1e219321eb309d5f9d898b779769d81b76e762c07c8e5d38fcb65" +dependencies = [ + "flate2", + "jpeg-decoder 0.2.6", + "weezl", +] + [[package]] name = "time" version = "0.2.27" @@ -1793,7 +2310,7 @@ dependencies = [ "bytes", "libc", "memchr", - "mio", + "mio 0.7.14", "num_cpus", "pin-project-lite", "winapi", @@ -1840,22 +2357,60 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.29" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" +checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307" dependencies = [ "cfg-if", "pin-project-lite", + "tracing-attributes", "tracing-core", ] [[package]] -name = "tracing-core" -version = "0.1.21" +name = "tracing-attributes" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4" +checksum = "11c75893af559bc8e10716548bdef5cb2b983f8e637db9d0e15126b61b484ee2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" dependencies = [ "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60db860322da191b40952ad9affe65ea23e7dd6a5c442c2c42865810c6ab8e6b" +dependencies = [ + "ansi_term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -1950,14 +2505,14 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.2.2" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" dependencies = [ "form_urlencoded", "idna", - "matches", "percent-encoding", + "serde", ] [[package]] @@ -1993,24 +2548,40 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +[[package]] +name = "viuer" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9db8b59a46844025fa705f65ba7d0a9ebf3369cb5fa2f10c532f1d41e0a56fa4" +dependencies = [ + "ansi_colours", + "base64 0.13.0", + "console", + "crossterm", + "image", + "lazy_static", + "tempfile", + "termcolor", +] + [[package]] name = "want" version = "0.3.0" @@ -2029,9 +2600,9 @@ checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" @@ -2120,6 +2691,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "weezl" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" + [[package]] name = "widestring" version = "0.4.3" @@ -2142,12 +2719,64 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + [[package]] name = "winreg" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index 150e0e2..efef566 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,22 +1,28 @@ [package] name = "iso7010_a_day" +description = "A silly twitter bot" version = "0.1.0" edition = "2018" [dependencies] reqwest = { version = "0.11", features = ["blocking", "json", "multipart"]} serde_json = "*" -serde = "*" +serde = { version = "1", features = ["derive"] } scraper = "*" rand = "*" resvg = "*" tiny-skia = "*" usvg = "*" oauth1 = "*" -clap = "*" +clap = { version = "*", features = ["derive"] } webbrowser = "*" itertools = "*" -url = "*" +tracing = "0.1.36" +tracing-subscriber = "0.3.15" +regex = "1.6.0" +image = "0.24.3" +viuer = "0.6.1" +url = { version = "2.3.1", features = ["serde"] } [build-dependencies] -toml = "*" \ No newline at end of file +toml = "*" diff --git a/src/main.rs b/src/main.rs index a956940..cb9d400 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,14 @@ -use itertools::Itertools; use rand::seq::SliceRandom; -use std::borrow::Cow; use std::convert::TryInto; -use std::io::prelude::*; -use std::{collections::HashMap, io::Write}; - -const APP_TOKEN_ENV_VAR: &str = "TWITTER_APP_TOKEN"; -const APP_SECRET_ENV_VAR: &str = "TWITTER_APP_SECRET"; -const USER_TOKEN_ENV_VAR: &str = "TWITTER_USER_TOKEN"; -const USER_SECRET_ENV_VAR: &str = "TWITTER_USER_SECRET"; -const IMG_HEIGHT: u32 = 1000; +use std::io::Cursor; +use tracing::{event, Level}; +mod twitter; +mod wiki; +use clap::{Parser, Subcommand}; +use image::{DynamicImage, RgbaImage}; +use tiny_skia::{Paint, PathBuilder, Pixmap, PixmapPaint, Stroke, Transform}; +use twitter::*; +use wiki::*; static APP_USER_AGENT: &str = concat!( "bot_", @@ -20,316 +19,224 @@ static APP_USER_AGENT: &str = concat!( "reqwest", ); -static CB_URL: &str = "http://localhost:6969/cb"; - -fn render_svg(data: &[u8]) -> Result, Box> { +// Render the raw SVG data to an image +fn render_svg(data: &[u8], height: u32, with_border: bool) -> StdError { let opt = usvg::Options::default(); - let rtree = usvg::Tree::from_data(&data, &opt.to_ref()).expect("couldn't parse"); - let mut pixmap = - tiny_skia::Pixmap::new(IMG_HEIGHT, IMG_HEIGHT).ok_or("Error creating pixmap")?; - resvg::render( - &rtree, - usvg::FitTo::Size(IMG_HEIGHT, IMG_HEIGHT), - pixmap.as_mut(), - ) - .ok_or_else(|| "Error rendering svg")?; - let mut bigger_pixmap = tiny_skia::Pixmap::new(IMG_HEIGHT / 9 * 16, IMG_HEIGHT) - .ok_or("Error creating bigger pixmap")?; + let rtree = usvg::Tree::from_data(data, &opt.to_ref()).expect("couldn't parse"); + let svg_size = rtree.svg_node().size; + // Work out how wide the pixmap of height `height` needs to be to entirely fit the SVG. + let pm_width = ((height as f64 / svg_size.height()) * svg_size.width()).ceil() as u32; + let mut pixmap = Pixmap::new(pm_width, height).ok_or("Error creating pixmap")?; + // Render the svg into a pixmap. + resvg::render(&rtree, usvg::FitTo::Height(height), pixmap.as_mut()) + .ok_or("Error rendering svg")?; + // Make a wider pixmap with a 16:9 AR and the same height. This is a blesséd ratio by twitter + // and means we see the whole image nicely in the timeline with no truncation. + let mut bigger_pixmap = + Pixmap::new(height / 9 * 16, height).ok_or("Error creating bigger pixmap")?; + // Then draw our freshly rendered SVG into the middle of the bigger pixmap. bigger_pixmap .draw_pixmap( - (bigger_pixmap.width() / 2 - IMG_HEIGHT / 2) - .try_into() - .unwrap(), + ((bigger_pixmap.width() - pm_width) / 2).try_into().unwrap(), 0, pixmap.as_ref(), - &tiny_skia::PixmapPaint::default(), - tiny_skia::Transform::identity(), + &PixmapPaint::default(), + Transform::identity(), None, ) .ok_or("Error drawing onto bigger pixmap")?; - let png_data = bigger_pixmap.encode_png()?; - Ok(png_data) -} + let (w, h) = (bigger_pixmap.width(), bigger_pixmap.height()); + // Render a red border for debug purposes + if with_border { + let mut paint = Paint::default(); + paint.set_color_rgba8(255, 0, 0, 255); + let stroke = Stroke { + width: 1.0, + ..Default::default() + }; -enum PostData<'a> { - Empty, - Multipart(reqwest::blocking::multipart::Form), - Data(&'a [(&'a str, &'a str)]), -} - -enum APIAction<'a> { - Get, - Post(PostData<'a>), -} - -struct TwitterEndpoint<'a>(&'a str); - -impl TryInto for TwitterEndpoint<'_> { - type Error = url::ParseError; - fn try_into(self) -> Result { - reqwest::Url::parse(&format!("https://api.twitter.com/{}", self.0)) + let path = { + let mut pb = PathBuilder::new(); + pb.move_to(0.0, 0.0); + pb.line_to(0.0, h as f32 - stroke.width); + pb.line_to(w as f32, h as f32 - stroke.width); + pb.line_to(w as f32 - stroke.width, 0.0); + pb.line_to(0.0, 0.0); + pb.finish().unwrap() + }; + bigger_pixmap.stroke_path(&path, &paint, &stroke, Transform::identity(), None); } -} - -// Make an authed twitter API request -fn twitter_api<'a>( - url: reqwest::Url, - user_token: Option<&oauth1::Token>, - action: APIAction, - extra_oauth_params: &[(&str, &str)], -) -> StdError { - let consumer_token = oauth1::Token::new( - std::env::var(APP_TOKEN_ENV_VAR)?, - std::env::var(APP_SECRET_ENV_VAR)?, - ); - let mut headers = reqwest::header::HeaderMap::new(); - let mut oauth_params: HashMap<&str, Cow> = extra_oauth_params - .iter() - .cloned() - .map(|(x, y)| (x, y.into())) - .collect(); - - // If the request is a key/value form post, we need to include those parameters when - // generating the signature. - match action { - APIAction::Post(PostData::Data(d)) => { - oauth_params.extend(d.iter().cloned().map(|(x, y)| (x, y.into()))) - } - _ => {} - } - - headers.insert( - reqwest::header::AUTHORIZATION, - reqwest::header::HeaderValue::from_str(&oauth1::authorize( - if matches!(action, APIAction::Post(_)) { - "POST" - } else { - "GET" - }, - url.as_str(), - &consumer_token, - user_token, - Some(oauth_params), - ))?, - ); - - let client = reqwest::blocking::Client::builder() - .user_agent(APP_USER_AGENT) - .default_headers(headers) - .build()?; - let req = match action { - APIAction::Get => client.get(url), - APIAction::Post(PostData::Empty) => client.post(url), - APIAction::Post(PostData::Data(data)) => client.post(url).form(data), - APIAction::Post(PostData::Multipart(form)) => client.post(url).multipart(form), - }; - - let res = req.send()?; - if !res.status().is_success() { - return Err(format!( - "Got non-200 response: status {}, {}", - res.status(), - res.text()? - ) - .into()); - } - Ok(res) + let img = RgbaImage::from_raw( + bigger_pixmap.width(), + bigger_pixmap.height(), + bigger_pixmap.data().to_vec(), + ) + .ok_or("Error creating image from pixmap")?; + Ok(DynamicImage::ImageRgba8(img)) } type StdError = Result>; +#[derive(Parser)] +#[clap(author, version, about)] +struct Cli { + #[clap(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Authorize the twitter application to acccess a user's account + Authorize, + /// Scrape images from the category on wikimedia commons + ScrapeCategory, + /// Scrape images from the iso7010 wikipedia page + ScrapeWeb, + /// List tweets from the authed user's timeline + ListTweets, + /// Run the bot - scrape, pick a random entry and tweet it + RunBot { + #[clap(short, long, action)] + dry_run: bool, + }, + /// Print details about the currently authed user + Whoami, +} + fn main() -> StdError<()> { - let matches = clap::App::new(env!("CARGO_PKG_NAME")) - .version(env!("CARGO_PKG_VERSION")) - .subcommand(clap::SubCommand::with_name("authorize").about("Authorize the twitter application to access a user's account by popping open a web browser and returning the credentials once authorized.")).get_matches(); - match matches.subcommand() { - ("authorize", _) => do_authorize(), - _ => run_bot(), + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); + match &cli.command { + Commands::Authorize => do_authorize(), + Commands::ScrapeCategory => do_scrape_category(), + Commands::ScrapeWeb => do_scrape_web(), + Commands::ListTweets => do_list_tweets(), + Commands::Whoami => do_whoami(), + Commands::RunBot { dry_run } => run_bot(*dry_run), } } -fn do_authorize() -> StdError<()> { - println!("Authorizing you lol!"); - - // Oauth1 leg 1 - let res = twitter_api( - TwitterEndpoint("oauth/request_token").try_into()?, - None, - APIAction::Post(PostData::Empty), - &[("oauth_callback", CB_URL)], - )? - .text()?; - - let returned_params: HashMap<&str, &str> = res - .split("&") - .map(|s| s.split("=").collect_tuple()) - .collect::>() - .ok_or("Unexpected oauth step 1 response")?; - - // Oauth1 leg 2 - let user_url = reqwest::Url::parse_with_params( - "https://api.twitter.com/oauth/authenticate", - [("oauth_token", returned_params["oauth_token"])], - )?; - println!("Plz do the thing in the browser"); - webbrowser::open(user_url.as_str())?; - - let listener = std::net::TcpListener::bind("127.0.0.1:6969")?; - let mut stream = listener.incoming().next().ok_or("Error getting stream")??; - let mut buf = [0u8; 4096]; - stream.read(&mut buf[..])?; - - let target = std::str::from_utf8( - buf.split(|c| *c == b' ') - .skip(1) - .next() - .ok_or("No target found")?, - )?; - let oauth_verifier = reqwest::Url::parse("https://example.net/")? - .join(target.into())? - .query_pairs() - .find_map(|(k, v)| { - if k == "oauth_verifier" { - Some(v.into_owned()) - } else { - None - } - }) - .ok_or("no oauth_verifier in response")?; - stream.write(b"HTTP/1.1 200 OK\r\n\r\nThanks lmao\r\n")?; - stream.shutdown(std::net::Shutdown::Read)?; - - // Oauth1 leg 3 - let res = twitter_api( - TwitterEndpoint("oauth/access_token").try_into()?, - None, - APIAction::Post(PostData::Data(&[("oauth_verifier", &oauth_verifier)])), - &[("oauth_token", returned_params["oauth_token"])], - )? - .text()?; - let returned_params: HashMap<&str, &str> = res - .split("&") - .map(|s| s.split("=").collect_tuple()) - .collect::>() - .ok_or("Unexpected oauth step 3 response")?; - - println!( - "Authorized for {}.\nRun with {}={} {}={}", - returned_params["screen_name"], - USER_TOKEN_ENV_VAR, - returned_params["oauth_token"], - USER_SECRET_ENV_VAR, - returned_params["oauth_token_secret"] - ); - - Ok(()) -} - -fn upload_image(user_token: &oauth1::Token, img: Cow<'static, [u8]>) -> StdError { - let form = reqwest::blocking::multipart::Form::new() - .part("media", reqwest::blocking::multipart::Part::bytes(img)); - let res: serde_json::Value = twitter_api( - "https://upload.twitter.com/1.1/media/upload.json".try_into()?, - Some(&user_token), - APIAction::Post(PostData::Multipart(form)), - &[], - )? - .json()?; - Ok(res["media_id"].as_u64().ok_or("media_id not u64!")?) -} - -fn run_bot() -> StdError<()> { - let user_token = oauth1::Token::new( - std::env::var(USER_TOKEN_ENV_VAR)?, - std::env::var(USER_SECRET_ENV_VAR)?, - ); - let args: Vec = std::env::args().collect(); - if args.len() < 2 { - println!("usage: ./thing out.png"); - } - // Parse CSS selectors to scrape elements - let gallerybox_sel = scraper::Selector::parse(".mw-body-content li.gallerybox") - .map_err(|e| format!("{:?}", e))?; - let link_sel = scraper::Selector::parse("a.image").map_err(|e| format!("{:?}", e))?; - let title_sel = scraper::Selector::parse(".gallerytext p").map_err(|e| format!("{:?}", e))?; - let original_sel = scraper::Selector::parse(".fullMedia a").map_err(|e| format!("{:?}", e))?; - - // Fetch stuff! - let client = reqwest::blocking::Client::builder() - .user_agent(APP_USER_AGENT) - .build()?; - println!("Fetching main page"); - let txt = client - .get("https://en.wikipedia.org/wiki/ISO_7010") - .send()? - .text()?; - let page = scraper::Html::parse_document(txt.as_str()); - let things = page - .select(&gallerybox_sel) - .map(|a| { - let link = a - .select(&link_sel) - .next() - .unwrap() - .value() - .attr("href") - .unwrap(); - let title = a - .select(&title_sel) - .next() - .unwrap() - .text() - .collect::() - .trim() - .to_owned(); - (title, link) - }) - .collect::>(); - // Pick a random entry and fetch the original file - - let (title, link) = things - .choose(&mut rand::thread_rng()) - .ok_or_else(|| "got no images m8")?; - println!("Fetching image page"); - let media_page = client - .get(format!("https://en.wikipedia.org{}", link)) - .send()? - .text()?; - let page = scraper::Html::parse_document(media_page.as_str()); - let link = page - .select(&original_sel) - .next() - .unwrap() - .value() - .attr("href") - .unwrap(); - let svg = client.get(format!("https:{}", link)).send()?.bytes()?; - let png_data = render_svg(&svg)?; +fn do_whoami() -> StdError<()> { + let user_token = user_token_from_env(); let user: serde_json::Value = twitter_api( - TwitterEndpoint("1.1/account/verify_credentials.json").try_into()?, + TwitterEndpoint::VerifyCredentials.try_into()?, Some(&user_token), APIAction::Get, &[], )? .json()?; - println!( - "Tweeting for user @{}, (id: {})", - user["screen_name"], user["id"] - ); - - println!("Uploading image..."); - let img_id = upload_image(&user_token, Cow::from(png_data))?; - let tweet = title; - println!("Sending tweet..."); - twitter_api( - TwitterEndpoint("1.1/statuses/update.json").try_into()?, - Some(&user_token), - APIAction::Post(PostData::Data(&[ - ("media_ids", &img_id.to_string()), - ("status", tweet), - ])), - &[], - )?; + println!("User @{}, (id: {})", user["screen_name"], user["id"]); + Ok(()) +} + +fn do_list_tweets() -> StdError<()> { + let user_token = user_token_from_env(); + + let user = twitter_api( + TwitterEndpoint::VerifyCredentials.try_into()?, + Some(&user_token), + APIAction::Get, + &[], + )? + .json::()?; + + let id = user["id"].as_u64().unwrap(); + + let timeline: serde_json::Value = twitter_api( + reqwest::Url::parse_with_params( + &TwitterEndpoint::UserTimeline.to_string(), + [ + ("count", "200"), + ("exclude_replies", "true"), + ("include_retweets", "false"), + ("trim_user", "true"), + ("user_id", id.to_string().as_ref()), + ], + )?, + Some(&user_token), + APIAction::Get, + &[], + )? + .json()?; + for tweet in timeline.as_array().unwrap() { + let tweet = tweet.as_object().unwrap(); + println!("{}, \"{}\"", tweet["id"], tweet["text"]); + } + Ok(()) +} + +fn do_scrape_category() -> StdError<()> { + let mut files = get_files_in_category("Category:ISO_7010_safety_signs_(vector_drawings)")?; + files.sort(); + for f in files { + println!("{}", f); + } + + Ok(()) +} + +fn do_scrape_web() -> StdError<()> { + let mut files: Vec<_> = scrape_web()?.into_iter().map(|(_, file)| file).collect(); + files.sort(); + for f in files { + println!("{}", f); + } + + Ok(()) +} + +fn get_client(headers: Option) -> StdError { + let mut c = reqwest::blocking::Client::builder().user_agent(APP_USER_AGENT); + if let Some(headers) = headers { + c = c.default_headers(headers); + } + Ok(c.build()?) +} + +fn run_bot(dry_run: bool) -> StdError<()> { + let all = scrape_web()?; + let (title, filename) = all + .choose(&mut rand::thread_rng()) + .ok_or("got no images m8")?; + event!(Level::INFO, title, filename, "Picked random thing"); + let client = get_client(None)?; + event!(Level::INFO, "Fetching metadata..."); + // TODO: could crash, probably doesn't matter + let meta = get_file_metadata(vec![filename])?.remove(0); + event!(Level::INFO, %meta, "Got metadata"); + event!(Level::INFO, url = meta.url.to_string(), "Fetching image"); + let svg = client.get(meta.url).send()?.bytes()?; + + let text = format!( + "{}\n\nImage source: {}\nAuthor: Wikimedia Commons user {}\n{}{}", + title, + meta.html_url, + meta.author, + meta.license_short_name, + meta.license_url + .map_or("".to_owned(), |u| format!(" ({})", u)) + ); + + if !dry_run { + // Render the image nice and big for twitter + let img = render_svg(&svg, 1000, false)?; + let mut buf = Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::Png)?; + tweet(&text, Some(buf.into_inner().into()))?; + } else { + // Render the image smaller for output to terminal + let img = render_svg(&svg, 128, true)?; + println!("Dry run - would tweet:\n \"{}\"", text); + viuer::print( + &img, + &viuer::Config { + absolute_offset: false, + width: Some(32), + ..Default::default() + }, + )?; + } + Ok(()) } diff --git a/src/twitter.rs b/src/twitter.rs new file mode 100644 index 0000000..c088980 --- /dev/null +++ b/src/twitter.rs @@ -0,0 +1,276 @@ +use crate::{get_client, StdError}; +use itertools::Itertools; +use std::borrow::Cow; +use std::convert::TryInto; +use std::fmt; +use std::io::prelude::*; +use std::{collections::HashMap, io::Write}; +use tracing::{event, instrument, Level}; + +const APP_TOKEN_ENV_VAR: &str = "TWITTER_APP_TOKEN"; +const APP_SECRET_ENV_VAR: &str = "TWITTER_APP_SECRET"; +const USER_TOKEN_ENV_VAR: &str = "TWITTER_USER_TOKEN"; +const USER_SECRET_ENV_VAR: &str = "TWITTER_USER_SECRET"; + +static CB_URL: &str = "http://localhost:6969/cb"; + +pub fn user_token_from_env() -> oauth1::Token<'static> { + oauth1::Token::new( + std::env::var(USER_TOKEN_ENV_VAR).expect("No user token env var"), + std::env::var(USER_SECRET_ENV_VAR).expect("No user secret env var"), + ) +} + +pub enum TwitterEndpoint { + OauthRequestToken, + OauthAccessToken, + OauthAuthenticate, + UpdateStatus, + UserTimeline, + VerifyCredentials, +} + +impl std::fmt::Display for TwitterEndpoint { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + const BASE: &str = "https://api.twitter.com"; + let endpoint = match self { + Self::OauthAuthenticate => "oauth/authenticate", + Self::OauthRequestToken => "oauth/request_token", + Self::OauthAccessToken => "oauth/access_token", + Self::UpdateStatus => "1.1/statuses/update.json", + Self::UserTimeline => "1.1/statuses/user_timeline.json", + Self::VerifyCredentials => "1.1/account/verify_credentials.json", + }; + write!(f, "{}/{}", BASE, endpoint) + } +} + +impl TryInto for TwitterEndpoint { + type Error = url::ParseError; + fn try_into(self) -> Result { + reqwest::Url::parse(&self.to_string()) + } +} + +pub enum PostData<'a> { + Empty, + Multipart(reqwest::blocking::multipart::Form), + Data(&'a [(&'a str, Cow<'a, str>)]), +} + +impl fmt::Debug for PostData<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + Self::Empty => "Empty", + Self::Multipart(_) => "Multipart", + Self::Data(_) => "Data", + } + ) + } +} + +#[derive(Debug)] +pub enum APIAction<'a> { + Get, + Post(PostData<'a>), +} + +impl APIAction<'_> { + pub fn get_verb(&self) -> &'static str { + match self { + Self::Get => "GET", + Self::Post(_) => "POST", + } + } +} +// Make an authed twitter API request +#[instrument(skip(user_token), fields(url=url.to_string()))] +pub fn twitter_api<'a>( + url: reqwest::Url, + user_token: Option<&oauth1::Token>, + action: APIAction, + extra_oauth_params: &[(&str, &str)], +) -> StdError { + let consumer_token = oauth1::Token::new( + std::env::var(APP_TOKEN_ENV_VAR)?, + std::env::var(APP_SECRET_ENV_VAR)?, + ); + let mut headers = reqwest::header::HeaderMap::new(); + let mut params: HashMap<&str, Cow> = extra_oauth_params + .iter() + .cloned() + .map(|(x, y)| (x, y.into())) + .collect(); + + // Copy all our query parameters and add them to the list of params for oauth1 signature + // generation. + // This is a bit awkward, as params is a map from &str to Cow but query_pairs() returns + // (Cow, Cow) tuples. So we call into_owned to make (String, String) tuples, and then + // borrow from there. If there's a way to do it without copying, I couldn't find it. + let pairs: Vec<_> = url.query_pairs().into_owned().collect(); + for (k, v) in &pairs { + params.insert(k, Cow::Borrowed(v)); + } + + // If the request is a key/value form post, we also need to include those parameters when + // generating the signature. + if let APIAction::Post(PostData::Data(d)) = action { + params.extend(d.to_owned()) + } + + // The url used to generate the signature must not include the query params + let mut url_sans_query = url.clone(); + url_sans_query.set_query(None); + headers.insert( + reqwest::header::AUTHORIZATION, + reqwest::header::HeaderValue::from_str(&oauth1::authorize( + action.get_verb(), + url_sans_query.as_str(), + &consumer_token, + user_token, + Some(params), + ))?, + ); + let client = get_client(Some(headers))?; + let req = match action { + APIAction::Get => client.get(url), + APIAction::Post(PostData::Empty) => client.post(url), + APIAction::Post(PostData::Data(data)) => client.post(url).form(data), + APIAction::Post(PostData::Multipart(form)) => client.post(url).multipart(form), + }; + event!(Level::INFO, "Sending request"); + let res = req.send()?; + if !res.status().is_success() { + return Err(format!( + "Got non-200 response: status {}, {}", + res.status(), + res.text()? + ) + .into()); + } + Ok(res) +} + +pub fn do_authorize() -> StdError<()> { + println!("Authorizing you lol!"); + + // Oauth1 leg 1 + let res = twitter_api( + TwitterEndpoint::OauthRequestToken.try_into()?, + None, + APIAction::Post(PostData::Empty), + &[("oauth_callback", CB_URL)], + )? + .text()?; + + let returned_params: HashMap<&str, &str> = res + .split('&') + .map(|s| s.split('=').collect_tuple()) + .collect::>() + .ok_or("Unexpected oauth step 1 response")?; + + // Oauth1 leg 2 + let user_url = reqwest::Url::parse_with_params( + &TwitterEndpoint::OauthAuthenticate.to_string(), + [("oauth_token", returned_params["oauth_token"])], + )?; + println!("Plz do the thing in the browser"); + webbrowser::open(user_url.as_str())?; + + let listener = std::net::TcpListener::bind("127.0.0.1:6969")?; + let mut stream = listener.incoming().next().ok_or("Error getting stream")??; + let mut buf = [0u8; 4096]; + stream.read(&mut buf[..])?; + + let target = std::str::from_utf8(buf.split(|c| *c == b' ').nth(1).ok_or("No target found")?)?; + let oauth_verifier = reqwest::Url::parse("https://example.net/")? + .join(target)? + .query_pairs() + .find_map(|(k, v)| { + if k == "oauth_verifier" { + Some(v.into_owned()) + } else { + None + } + }) + .ok_or("no oauth_verifier in response")?; + stream.write_all(b"HTTP/1.1 200 OK\r\n\r\nThanks lmao\r\n")?; + stream.shutdown(std::net::Shutdown::Read)?; + + // Oauth1 leg 3 + let res = twitter_api( + TwitterEndpoint::OauthAccessToken.try_into()?, + None, + APIAction::Post(PostData::Data(&[( + "oauth_verifier", + Cow::Owned(oauth_verifier), + )])), + &[("oauth_token", returned_params["oauth_token"])], + )? + .text()?; + let returned_params: HashMap<&str, &str> = res + .split('&') + .map(|s| s.split('=').collect_tuple()) + .collect::>() + .ok_or("Unexpected oauth step 3 response")?; + + println!( + "Authorized for {}.\nRun with {}={} {}={}", + returned_params["screen_name"], + USER_TOKEN_ENV_VAR, + returned_params["oauth_token"], + USER_SECRET_ENV_VAR, + returned_params["oauth_token_secret"] + ); + + Ok(()) +} + +fn upload_image(user_token: &oauth1::Token, img: Cow<'static, [u8]>) -> StdError { + let form = reqwest::blocking::multipart::Form::new() + .part("media", reqwest::blocking::multipart::Part::bytes(img)); + let res: serde_json::Value = twitter_api( + "https://upload.twitter.com/1.1/media/upload.json".try_into()?, + Some(user_token), + APIAction::Post(PostData::Multipart(form)), + &[], + )? + .json()?; + Ok(res["media_id"].as_u64().ok_or("media_id not u64!")?) +} + +pub fn tweet(text: &str, img: Option>) -> StdError<()> { + let user_token = oauth1::Token::new( + std::env::var(USER_TOKEN_ENV_VAR)?, + std::env::var(USER_SECRET_ENV_VAR)?, + ); + + let user: serde_json::Value = twitter_api( + TwitterEndpoint::VerifyCredentials.try_into()?, + Some(&user_token), + APIAction::Get, + &[], + )? + .json()?; + println!( + "Tweeting for user @{}, (id: {})", + user["screen_name"], user["id"] + ); + let mut post_data = vec![("status", Cow::Borrowed(text))]; + if let Some(img) = img { + println!("Uploading image..."); + let img_id = upload_image(&user_token, img)?; + post_data.push(("media_ids", Cow::Owned(img_id.to_string()))) + } + event!(Level::INFO, "Sending tweet..."); + twitter_api( + TwitterEndpoint::UpdateStatus.try_into()?, + Some(&user_token), + APIAction::Post(PostData::Data(&post_data[0..])), + &[], + )?; + Ok(()) +} diff --git a/src/wiki.rs b/src/wiki.rs new file mode 100644 index 0000000..6fff280 --- /dev/null +++ b/src/wiki.rs @@ -0,0 +1,204 @@ +use std::collections::HashMap; + +use crate::{get_client, StdError}; +use regex::Regex; +use serde::Deserialize; +use std::fmt::Display; +use tracing::{event, instrument, Level}; +use url::Url; + +// Filter a filename string for filenames +fn filter_filename(filename: &str) -> bool { + let re = Regex::new("ISO.7010.[EWMPF][0-9]{3}.*").unwrap(); + re.is_match(filename) +} + +// Scrape all images from the wikipedia page, returning a vec of title, filename pairs +pub fn scrape_web() -> StdError> { + event!(Level::INFO, "Scraping the wikipedia page for things"); + // Parse CSS selectors to scrape elements + let gallerybox_sel = scraper::Selector::parse(".mw-body-content li.gallerybox") + .map_err(|e| format!("{:?}", e))?; + let link_sel = scraper::Selector::parse("a.image").map_err(|e| format!("{:?}", e))?; + let title_sel = scraper::Selector::parse(".gallerytext p").map_err(|e| format!("{:?}", e))?; + + // Fetch stuff! + let client = get_client(None)?; + event!(Level::INFO, "Fetching wiki page"); + let txt = client + .get("https://en.wikipedia.org/wiki/ISO_7010") + .send()? + .text()?; + let page = scraper::Html::parse_document(txt.as_str()); + return Ok(page + .select(&gallerybox_sel) + .map(|a| { + let link = a + .select(&link_sel) + .next() + .unwrap() + .value() + .attr("href") + .unwrap() + .to_owned(); + let title = a + .select(&title_sel) + .next() + .unwrap() + .text() + .collect::() + .trim() + .to_owned(); + (title, link) + }) + // Filter for filenames that look like ISO diagrams + .filter(|tup| filter_filename(&tup.1)) + // Extract the file name only (.e.g `File:ISO_7010_X000.svg`) + .filter_map(|(title, link)| { + link.split('/') + .next_back() + .map(|end| (title, end.to_owned())) + }) + .collect::>()); +} + +#[instrument] +pub fn wiki_query_url(params: Vec<(&str, &str)>) -> StdError { + let mut url = Url::parse("https://commons.wikimedia.org/w/api.php?action=query&format=json")?; + url.query_pairs_mut().extend_pairs(params); + Ok(url) +} +// https://commons.wikimedia.org/w/api.php?action=query&format=json&list=categorymembers&cmtitle=Category:ISO_7010_safety_signs_(vector_drawings)&cmlimit=2 + +#[instrument] +pub fn get_files_in_category(category: &str) -> StdError> { + let client = get_client(None)?; + let url = wiki_query_url( + [ + ("list", "categorymembers"), + ("cmtitle", category), + ("cmtype", "file"), + ("cmlimit", "max"), + ] + .into(), + )?; + let data = client.get(url).send()?.json::()?; + if data.get("continue").is_some() { + // There are more results than are contained in one response, so now you need to implement + // pagination. Have fun! + panic!("Wikimedia query result is paginated!"); + } + Ok(data["query"]["categorymembers"] + .as_array() + .unwrap() + .iter() + .filter_map(|m| Some(m.as_object().unwrap()["title"].as_str()?.replace(' ', "_"))) + .collect()) +} + +#[derive(Debug)] +pub struct FileMeta { + pub url: url::Url, + pub name: String, + pub html_url: url::Url, + pub author: String, + pub attribution_required: String, + pub license_short_name: String, + pub license_url: Option, +} + +impl Display for FileMeta { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "FileMeta{{url: {}, name: {}, html_url: {}, author: {}, attribution_required: {}, license_short_name: {}, license_url: {}}}", + self.url, self.name, self.html_url, self.author, self.attribution_required, self.license_short_name, + self.license_url.clone().map_or("None".to_owned(), |u| u.to_string()) // Ew. + ) + } +} + +// Partial representation of the data returned from a MediaWiki imageinfo query +#[derive(Deserialize)] +struct Query { + query: QueryInner, +} + +#[derive(Deserialize, Debug)] +struct QueryInner { + pages: HashMap, +} + +#[derive(Deserialize, Debug)] +struct Page { + imageinfo: Vec, +} + +#[derive(Deserialize, Debug)] +struct ImageInfo { + user: String, + url: url::Url, + descriptionurl: url::Url, + extmetadata: ExtMeta, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "PascalCase")] +struct ExtMeta { + object_name: ExtMetaItem, + license_short_name: ExtMetaItem, + attribution_required: ExtMetaItem, + license_url: Option>, +} + +#[derive(Deserialize, Debug, Clone)] +struct ExtMetaItem { + value: T, +} + +pub fn get_file_metadata(files: Vec<&str>) -> StdError> { + let client = get_client(None)?; + // Api only lets us do 50 files in one request + Ok(files + .chunks(50) + .flat_map(|files_chunk| { + let url = wiki_query_url( + [ + ("titles", files_chunk.join("|").as_ref()), + ("prop", "imageinfo"), + ( + "iiprop", + "timestamp|url|size|mime|mediatype|extmetadata|user", + ), + // Get metadata for as many revisions of the file as we are allowed. We're unlikely to encounter a file with >500 revisions. + ("iilimit", "500"), + ( + "iiextmetadatafilter", + "ObjectName|LicenseShortName|AttributionRequired|LicenseUrl", + ), + ] + .into(), + ) + .unwrap(); + let data = client.get(url).send().unwrap().json::().unwrap(); + + data.query + .pages + .values() + .map(|page| { + let latest = page.imageinfo.first().unwrap(); + let oldest = page.imageinfo.last().unwrap(); + FileMeta { + url: latest.url.clone(), + name: latest.extmetadata.object_name.value.clone(), + html_url: latest.descriptionurl.clone(), + author: oldest.user.clone(), + license_short_name: latest.extmetadata.license_short_name.value.clone(), + license_url: latest.extmetadata.license_url.clone().map(|i| i.value), + attribution_required: latest.extmetadata.attribution_required.value.clone(), + } + }) + .collect::>() + }) + .collect()) +}