Add attribution of tweets, scraping cmds and dry run

This commit is contained in:
Sam W 2022-09-13 16:36:19 +01:00
parent b8c794f9d7
commit 3df33fc522
5 changed files with 1382 additions and 360 deletions

749
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,28 @@
[package]
name = "iso7010_a_day"
description = "A silly twitter bot"
version = "0.1.0"
edition = "2018"
[dependencies]
reqwest = { version = "0.11", features = ["blocking", "json", "multipart"]}
serde_json = "*"
serde = "*"
serde = { version = "1", features = ["derive"] }
scraper = "*"
rand = "*"
resvg = "*"
tiny-skia = "*"
usvg = "*"
oauth1 = "*"
clap = "*"
clap = { version = "*", features = ["derive"] }
webbrowser = "*"
itertools = "*"
url = "*"
tracing = "0.1.36"
tracing-subscriber = "0.3.15"
regex = "1.6.0"
image = "0.24.3"
viuer = "0.6.1"
url = { version = "2.3.1", features = ["serde"] }
[build-dependencies]
toml = "*"
toml = "*"

View File

@ -1,15 +1,14 @@
use itertools::Itertools;
use rand::seq::SliceRandom;
use std::borrow::Cow;
use std::convert::TryInto;
use std::io::prelude::*;
use std::{collections::HashMap, io::Write};
const APP_TOKEN_ENV_VAR: &str = "TWITTER_APP_TOKEN";
const APP_SECRET_ENV_VAR: &str = "TWITTER_APP_SECRET";
const USER_TOKEN_ENV_VAR: &str = "TWITTER_USER_TOKEN";
const USER_SECRET_ENV_VAR: &str = "TWITTER_USER_SECRET";
const IMG_HEIGHT: u32 = 1000;
use std::io::Cursor;
use tracing::{event, Level};
mod twitter;
mod wiki;
use clap::{Parser, Subcommand};
use image::{DynamicImage, RgbaImage};
use tiny_skia::{Paint, PathBuilder, Pixmap, PixmapPaint, Stroke, Transform};
use twitter::*;
use wiki::*;
static APP_USER_AGENT: &str = concat!(
"bot_",
@ -20,316 +19,224 @@ static APP_USER_AGENT: &str = concat!(
"reqwest",
);
static CB_URL: &str = "http://localhost:6969/cb";
fn render_svg(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
// Render the raw SVG data to an image
fn render_svg(data: &[u8], height: u32, with_border: bool) -> StdError<DynamicImage> {
let opt = usvg::Options::default();
let rtree = usvg::Tree::from_data(&data, &opt.to_ref()).expect("couldn't parse");
let mut pixmap =
tiny_skia::Pixmap::new(IMG_HEIGHT, IMG_HEIGHT).ok_or("Error creating pixmap")?;
resvg::render(
&rtree,
usvg::FitTo::Size(IMG_HEIGHT, IMG_HEIGHT),
pixmap.as_mut(),
)
.ok_or_else(|| "Error rendering svg")?;
let mut bigger_pixmap = tiny_skia::Pixmap::new(IMG_HEIGHT / 9 * 16, IMG_HEIGHT)
.ok_or("Error creating bigger pixmap")?;
let rtree = usvg::Tree::from_data(data, &opt.to_ref()).expect("couldn't parse");
let svg_size = rtree.svg_node().size;
// Work out how wide the pixmap of height `height` needs to be to entirely fit the SVG.
let pm_width = ((height as f64 / svg_size.height()) * svg_size.width()).ceil() as u32;
let mut pixmap = Pixmap::new(pm_width, height).ok_or("Error creating pixmap")?;
// Render the svg into a pixmap.
resvg::render(&rtree, usvg::FitTo::Height(height), pixmap.as_mut())
.ok_or("Error rendering svg")?;
// Make a wider pixmap with a 16:9 AR and the same height. This is a blesséd ratio by twitter
// and means we see the whole image nicely in the timeline with no truncation.
let mut bigger_pixmap =
Pixmap::new(height / 9 * 16, height).ok_or("Error creating bigger pixmap")?;
// Then draw our freshly rendered SVG into the middle of the bigger pixmap.
bigger_pixmap
.draw_pixmap(
(bigger_pixmap.width() / 2 - IMG_HEIGHT / 2)
.try_into()
.unwrap(),
((bigger_pixmap.width() - pm_width) / 2).try_into().unwrap(),
0,
pixmap.as_ref(),
&tiny_skia::PixmapPaint::default(),
tiny_skia::Transform::identity(),
&PixmapPaint::default(),
Transform::identity(),
None,
)
.ok_or("Error drawing onto bigger pixmap")?;
let png_data = bigger_pixmap.encode_png()?;
Ok(png_data)
}
let (w, h) = (bigger_pixmap.width(), bigger_pixmap.height());
// Render a red border for debug purposes
if with_border {
let mut paint = Paint::default();
paint.set_color_rgba8(255, 0, 0, 255);
let stroke = Stroke {
width: 1.0,
..Default::default()
};
enum PostData<'a> {
Empty,
Multipart(reqwest::blocking::multipart::Form),
Data(&'a [(&'a str, &'a str)]),
}
enum APIAction<'a> {
Get,
Post(PostData<'a>),
}
struct TwitterEndpoint<'a>(&'a str);
impl TryInto<reqwest::Url> for TwitterEndpoint<'_> {
type Error = url::ParseError;
fn try_into(self) -> Result<reqwest::Url, Self::Error> {
reqwest::Url::parse(&format!("https://api.twitter.com/{}", self.0))
let path = {
let mut pb = PathBuilder::new();
pb.move_to(0.0, 0.0);
pb.line_to(0.0, h as f32 - stroke.width);
pb.line_to(w as f32, h as f32 - stroke.width);
pb.line_to(w as f32 - stroke.width, 0.0);
pb.line_to(0.0, 0.0);
pb.finish().unwrap()
};
bigger_pixmap.stroke_path(&path, &paint, &stroke, Transform::identity(), None);
}
}
// Make an authed twitter API request
fn twitter_api<'a>(
url: reqwest::Url,
user_token: Option<&oauth1::Token>,
action: APIAction,
extra_oauth_params: &[(&str, &str)],
) -> StdError<reqwest::blocking::Response> {
let consumer_token = oauth1::Token::new(
std::env::var(APP_TOKEN_ENV_VAR)?,
std::env::var(APP_SECRET_ENV_VAR)?,
);
let mut headers = reqwest::header::HeaderMap::new();
let mut oauth_params: HashMap<&str, Cow<str>> = extra_oauth_params
.iter()
.cloned()
.map(|(x, y)| (x, y.into()))
.collect();
// If the request is a key/value form post, we need to include those parameters when
// generating the signature.
match action {
APIAction::Post(PostData::Data(d)) => {
oauth_params.extend(d.iter().cloned().map(|(x, y)| (x, y.into())))
}
_ => {}
}
headers.insert(
reqwest::header::AUTHORIZATION,
reqwest::header::HeaderValue::from_str(&oauth1::authorize(
if matches!(action, APIAction::Post(_)) {
"POST"
} else {
"GET"
},
url.as_str(),
&consumer_token,
user_token,
Some(oauth_params),
))?,
);
let client = reqwest::blocking::Client::builder()
.user_agent(APP_USER_AGENT)
.default_headers(headers)
.build()?;
let req = match action {
APIAction::Get => client.get(url),
APIAction::Post(PostData::Empty) => client.post(url),
APIAction::Post(PostData::Data(data)) => client.post(url).form(data),
APIAction::Post(PostData::Multipart(form)) => client.post(url).multipart(form),
};
let res = req.send()?;
if !res.status().is_success() {
return Err(format!(
"Got non-200 response: status {}, {}",
res.status(),
res.text()?
)
.into());
}
Ok(res)
let img = RgbaImage::from_raw(
bigger_pixmap.width(),
bigger_pixmap.height(),
bigger_pixmap.data().to_vec(),
)
.ok_or("Error creating image from pixmap")?;
Ok(DynamicImage::ImageRgba8(img))
}
type StdError<T> = Result<T, Box<dyn std::error::Error>>;
#[derive(Parser)]
#[clap(author, version, about)]
struct Cli {
#[clap(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Authorize the twitter application to acccess a user's account
Authorize,
/// Scrape images from the category on wikimedia commons
ScrapeCategory,
/// Scrape images from the iso7010 wikipedia page
ScrapeWeb,
/// List tweets from the authed user's timeline
ListTweets,
/// Run the bot - scrape, pick a random entry and tweet it
RunBot {
#[clap(short, long, action)]
dry_run: bool,
},
/// Print details about the currently authed user
Whoami,
}
fn main() -> StdError<()> {
let matches = clap::App::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.subcommand(clap::SubCommand::with_name("authorize").about("Authorize the twitter application to access a user's account by popping open a web browser and returning the credentials once authorized.")).get_matches();
match matches.subcommand() {
("authorize", _) => do_authorize(),
_ => run_bot(),
tracing_subscriber::fmt::init();
let cli = Cli::parse();
match &cli.command {
Commands::Authorize => do_authorize(),
Commands::ScrapeCategory => do_scrape_category(),
Commands::ScrapeWeb => do_scrape_web(),
Commands::ListTweets => do_list_tweets(),
Commands::Whoami => do_whoami(),
Commands::RunBot { dry_run } => run_bot(*dry_run),
}
}
fn do_authorize() -> StdError<()> {
println!("Authorizing you lol!");
// Oauth1 leg 1
let res = twitter_api(
TwitterEndpoint("oauth/request_token").try_into()?,
None,
APIAction::Post(PostData::Empty),
&[("oauth_callback", CB_URL)],
)?
.text()?;
let returned_params: HashMap<&str, &str> = res
.split("&")
.map(|s| s.split("=").collect_tuple())
.collect::<Option<_>>()
.ok_or("Unexpected oauth step 1 response")?;
// Oauth1 leg 2
let user_url = reqwest::Url::parse_with_params(
"https://api.twitter.com/oauth/authenticate",
[("oauth_token", returned_params["oauth_token"])],
)?;
println!("Plz do the thing in the browser");
webbrowser::open(user_url.as_str())?;
let listener = std::net::TcpListener::bind("127.0.0.1:6969")?;
let mut stream = listener.incoming().next().ok_or("Error getting stream")??;
let mut buf = [0u8; 4096];
stream.read(&mut buf[..])?;
let target = std::str::from_utf8(
buf.split(|c| *c == b' ')
.skip(1)
.next()
.ok_or("No target found")?,
)?;
let oauth_verifier = reqwest::Url::parse("https://example.net/")?
.join(target.into())?
.query_pairs()
.find_map(|(k, v)| {
if k == "oauth_verifier" {
Some(v.into_owned())
} else {
None
}
})
.ok_or("no oauth_verifier in response")?;
stream.write(b"HTTP/1.1 200 OK\r\n\r\nThanks lmao\r\n")?;
stream.shutdown(std::net::Shutdown::Read)?;
// Oauth1 leg 3
let res = twitter_api(
TwitterEndpoint("oauth/access_token").try_into()?,
None,
APIAction::Post(PostData::Data(&[("oauth_verifier", &oauth_verifier)])),
&[("oauth_token", returned_params["oauth_token"])],
)?
.text()?;
let returned_params: HashMap<&str, &str> = res
.split("&")
.map(|s| s.split("=").collect_tuple())
.collect::<Option<_>>()
.ok_or("Unexpected oauth step 3 response")?;
println!(
"Authorized for {}.\nRun with {}={} {}={}",
returned_params["screen_name"],
USER_TOKEN_ENV_VAR,
returned_params["oauth_token"],
USER_SECRET_ENV_VAR,
returned_params["oauth_token_secret"]
);
Ok(())
}
fn upload_image(user_token: &oauth1::Token, img: Cow<'static, [u8]>) -> StdError<u64> {
let form = reqwest::blocking::multipart::Form::new()
.part("media", reqwest::blocking::multipart::Part::bytes(img));
let res: serde_json::Value = twitter_api(
"https://upload.twitter.com/1.1/media/upload.json".try_into()?,
Some(&user_token),
APIAction::Post(PostData::Multipart(form)),
&[],
)?
.json()?;
Ok(res["media_id"].as_u64().ok_or("media_id not u64!")?)
}
fn run_bot() -> StdError<()> {
let user_token = oauth1::Token::new(
std::env::var(USER_TOKEN_ENV_VAR)?,
std::env::var(USER_SECRET_ENV_VAR)?,
);
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
println!("usage: ./thing out.png");
}
// Parse CSS selectors to scrape elements
let gallerybox_sel = scraper::Selector::parse(".mw-body-content li.gallerybox")
.map_err(|e| format!("{:?}", e))?;
let link_sel = scraper::Selector::parse("a.image").map_err(|e| format!("{:?}", e))?;
let title_sel = scraper::Selector::parse(".gallerytext p").map_err(|e| format!("{:?}", e))?;
let original_sel = scraper::Selector::parse(".fullMedia a").map_err(|e| format!("{:?}", e))?;
// Fetch stuff!
let client = reqwest::blocking::Client::builder()
.user_agent(APP_USER_AGENT)
.build()?;
println!("Fetching main page");
let txt = client
.get("https://en.wikipedia.org/wiki/ISO_7010")
.send()?
.text()?;
let page = scraper::Html::parse_document(txt.as_str());
let things = page
.select(&gallerybox_sel)
.map(|a| {
let link = a
.select(&link_sel)
.next()
.unwrap()
.value()
.attr("href")
.unwrap();
let title = a
.select(&title_sel)
.next()
.unwrap()
.text()
.collect::<String>()
.trim()
.to_owned();
(title, link)
})
.collect::<Vec<(String, &str)>>();
// Pick a random entry and fetch the original file
let (title, link) = things
.choose(&mut rand::thread_rng())
.ok_or_else(|| "got no images m8")?;
println!("Fetching image page");
let media_page = client
.get(format!("https://en.wikipedia.org{}", link))
.send()?
.text()?;
let page = scraper::Html::parse_document(media_page.as_str());
let link = page
.select(&original_sel)
.next()
.unwrap()
.value()
.attr("href")
.unwrap();
let svg = client.get(format!("https:{}", link)).send()?.bytes()?;
let png_data = render_svg(&svg)?;
fn do_whoami() -> StdError<()> {
let user_token = user_token_from_env();
let user: serde_json::Value = twitter_api(
TwitterEndpoint("1.1/account/verify_credentials.json").try_into()?,
TwitterEndpoint::VerifyCredentials.try_into()?,
Some(&user_token),
APIAction::Get,
&[],
)?
.json()?;
println!(
"Tweeting for user @{}, (id: {})",
user["screen_name"], user["id"]
);
println!("Uploading image...");
let img_id = upload_image(&user_token, Cow::from(png_data))?;
let tweet = title;
println!("Sending tweet...");
twitter_api(
TwitterEndpoint("1.1/statuses/update.json").try_into()?,
Some(&user_token),
APIAction::Post(PostData::Data(&[
("media_ids", &img_id.to_string()),
("status", tweet),
])),
&[],
)?;
println!("User @{}, (id: {})", user["screen_name"], user["id"]);
Ok(())
}
fn do_list_tweets() -> StdError<()> {
let user_token = user_token_from_env();
let user = twitter_api(
TwitterEndpoint::VerifyCredentials.try_into()?,
Some(&user_token),
APIAction::Get,
&[],
)?
.json::<serde_json::Value>()?;
let id = user["id"].as_u64().unwrap();
let timeline: serde_json::Value = twitter_api(
reqwest::Url::parse_with_params(
&TwitterEndpoint::UserTimeline.to_string(),
[
("count", "200"),
("exclude_replies", "true"),
("include_retweets", "false"),
("trim_user", "true"),
("user_id", id.to_string().as_ref()),
],
)?,
Some(&user_token),
APIAction::Get,
&[],
)?
.json()?;
for tweet in timeline.as_array().unwrap() {
let tweet = tweet.as_object().unwrap();
println!("{}, \"{}\"", tweet["id"], tweet["text"]);
}
Ok(())
}
fn do_scrape_category() -> StdError<()> {
let mut files = get_files_in_category("Category:ISO_7010_safety_signs_(vector_drawings)")?;
files.sort();
for f in files {
println!("{}", f);
}
Ok(())
}
fn do_scrape_web() -> StdError<()> {
let mut files: Vec<_> = scrape_web()?.into_iter().map(|(_, file)| file).collect();
files.sort();
for f in files {
println!("{}", f);
}
Ok(())
}
fn get_client(headers: Option<reqwest::header::HeaderMap>) -> StdError<reqwest::blocking::Client> {
let mut c = reqwest::blocking::Client::builder().user_agent(APP_USER_AGENT);
if let Some(headers) = headers {
c = c.default_headers(headers);
}
Ok(c.build()?)
}
fn run_bot(dry_run: bool) -> StdError<()> {
let all = scrape_web()?;
let (title, filename) = all
.choose(&mut rand::thread_rng())
.ok_or("got no images m8")?;
event!(Level::INFO, title, filename, "Picked random thing");
let client = get_client(None)?;
event!(Level::INFO, "Fetching metadata...");
// TODO: could crash, probably doesn't matter
let meta = get_file_metadata(vec![filename])?.remove(0);
event!(Level::INFO, %meta, "Got metadata");
event!(Level::INFO, url = meta.url.to_string(), "Fetching image");
let svg = client.get(meta.url).send()?.bytes()?;
let text = format!(
"{}\n\nImage source: {}\nAuthor: Wikimedia Commons user {}\n{}{}",
title,
meta.html_url,
meta.author,
meta.license_short_name,
meta.license_url
.map_or("".to_owned(), |u| format!(" ({})", u))
);
if !dry_run {
// Render the image nice and big for twitter
let img = render_svg(&svg, 1000, false)?;
let mut buf = Cursor::new(Vec::new());
img.write_to(&mut buf, image::ImageFormat::Png)?;
tweet(&text, Some(buf.into_inner().into()))?;
} else {
// Render the image smaller for output to terminal
let img = render_svg(&svg, 128, true)?;
println!("Dry run - would tweet:\n \"{}\"", text);
viuer::print(
&img,
&viuer::Config {
absolute_offset: false,
width: Some(32),
..Default::default()
},
)?;
}
Ok(())
}

276
src/twitter.rs Normal file
View File

@ -0,0 +1,276 @@
use crate::{get_client, StdError};
use itertools::Itertools;
use std::borrow::Cow;
use std::convert::TryInto;
use std::fmt;
use std::io::prelude::*;
use std::{collections::HashMap, io::Write};
use tracing::{event, instrument, Level};
const APP_TOKEN_ENV_VAR: &str = "TWITTER_APP_TOKEN";
const APP_SECRET_ENV_VAR: &str = "TWITTER_APP_SECRET";
const USER_TOKEN_ENV_VAR: &str = "TWITTER_USER_TOKEN";
const USER_SECRET_ENV_VAR: &str = "TWITTER_USER_SECRET";
static CB_URL: &str = "http://localhost:6969/cb";
pub fn user_token_from_env() -> oauth1::Token<'static> {
oauth1::Token::new(
std::env::var(USER_TOKEN_ENV_VAR).expect("No user token env var"),
std::env::var(USER_SECRET_ENV_VAR).expect("No user secret env var"),
)
}
pub enum TwitterEndpoint {
OauthRequestToken,
OauthAccessToken,
OauthAuthenticate,
UpdateStatus,
UserTimeline,
VerifyCredentials,
}
impl std::fmt::Display for TwitterEndpoint {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
const BASE: &str = "https://api.twitter.com";
let endpoint = match self {
Self::OauthAuthenticate => "oauth/authenticate",
Self::OauthRequestToken => "oauth/request_token",
Self::OauthAccessToken => "oauth/access_token",
Self::UpdateStatus => "1.1/statuses/update.json",
Self::UserTimeline => "1.1/statuses/user_timeline.json",
Self::VerifyCredentials => "1.1/account/verify_credentials.json",
};
write!(f, "{}/{}", BASE, endpoint)
}
}
impl TryInto<reqwest::Url> for TwitterEndpoint {
type Error = url::ParseError;
fn try_into(self) -> Result<reqwest::Url, Self::Error> {
reqwest::Url::parse(&self.to_string())
}
}
pub enum PostData<'a> {
Empty,
Multipart(reqwest::blocking::multipart::Form),
Data(&'a [(&'a str, Cow<'a, str>)]),
}
impl fmt::Debug for PostData<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}",
match self {
Self::Empty => "Empty",
Self::Multipart(_) => "Multipart",
Self::Data(_) => "Data",
}
)
}
}
#[derive(Debug)]
pub enum APIAction<'a> {
Get,
Post(PostData<'a>),
}
impl APIAction<'_> {
pub fn get_verb(&self) -> &'static str {
match self {
Self::Get => "GET",
Self::Post(_) => "POST",
}
}
}
// Make an authed twitter API request
#[instrument(skip(user_token), fields(url=url.to_string()))]
pub fn twitter_api<'a>(
url: reqwest::Url,
user_token: Option<&oauth1::Token>,
action: APIAction,
extra_oauth_params: &[(&str, &str)],
) -> StdError<reqwest::blocking::Response> {
let consumer_token = oauth1::Token::new(
std::env::var(APP_TOKEN_ENV_VAR)?,
std::env::var(APP_SECRET_ENV_VAR)?,
);
let mut headers = reqwest::header::HeaderMap::new();
let mut params: HashMap<&str, Cow<str>> = extra_oauth_params
.iter()
.cloned()
.map(|(x, y)| (x, y.into()))
.collect();
// Copy all our query parameters and add them to the list of params for oauth1 signature
// generation.
// This is a bit awkward, as params is a map from &str to Cow<str> but query_pairs() returns
// (Cow<str>, Cow<str>) tuples. So we call into_owned to make (String, String) tuples, and then
// borrow from there. If there's a way to do it without copying, I couldn't find it.
let pairs: Vec<_> = url.query_pairs().into_owned().collect();
for (k, v) in &pairs {
params.insert(k, Cow::Borrowed(v));
}
// If the request is a key/value form post, we also need to include those parameters when
// generating the signature.
if let APIAction::Post(PostData::Data(d)) = action {
params.extend(d.to_owned())
}
// The url used to generate the signature must not include the query params
let mut url_sans_query = url.clone();
url_sans_query.set_query(None);
headers.insert(
reqwest::header::AUTHORIZATION,
reqwest::header::HeaderValue::from_str(&oauth1::authorize(
action.get_verb(),
url_sans_query.as_str(),
&consumer_token,
user_token,
Some(params),
))?,
);
let client = get_client(Some(headers))?;
let req = match action {
APIAction::Get => client.get(url),
APIAction::Post(PostData::Empty) => client.post(url),
APIAction::Post(PostData::Data(data)) => client.post(url).form(data),
APIAction::Post(PostData::Multipart(form)) => client.post(url).multipart(form),
};
event!(Level::INFO, "Sending request");
let res = req.send()?;
if !res.status().is_success() {
return Err(format!(
"Got non-200 response: status {}, {}",
res.status(),
res.text()?
)
.into());
}
Ok(res)
}
pub fn do_authorize() -> StdError<()> {
println!("Authorizing you lol!");
// Oauth1 leg 1
let res = twitter_api(
TwitterEndpoint::OauthRequestToken.try_into()?,
None,
APIAction::Post(PostData::Empty),
&[("oauth_callback", CB_URL)],
)?
.text()?;
let returned_params: HashMap<&str, &str> = res
.split('&')
.map(|s| s.split('=').collect_tuple())
.collect::<Option<_>>()
.ok_or("Unexpected oauth step 1 response")?;
// Oauth1 leg 2
let user_url = reqwest::Url::parse_with_params(
&TwitterEndpoint::OauthAuthenticate.to_string(),
[("oauth_token", returned_params["oauth_token"])],
)?;
println!("Plz do the thing in the browser");
webbrowser::open(user_url.as_str())?;
let listener = std::net::TcpListener::bind("127.0.0.1:6969")?;
let mut stream = listener.incoming().next().ok_or("Error getting stream")??;
let mut buf = [0u8; 4096];
stream.read(&mut buf[..])?;
let target = std::str::from_utf8(buf.split(|c| *c == b' ').nth(1).ok_or("No target found")?)?;
let oauth_verifier = reqwest::Url::parse("https://example.net/")?
.join(target)?
.query_pairs()
.find_map(|(k, v)| {
if k == "oauth_verifier" {
Some(v.into_owned())
} else {
None
}
})
.ok_or("no oauth_verifier in response")?;
stream.write_all(b"HTTP/1.1 200 OK\r\n\r\nThanks lmao\r\n")?;
stream.shutdown(std::net::Shutdown::Read)?;
// Oauth1 leg 3
let res = twitter_api(
TwitterEndpoint::OauthAccessToken.try_into()?,
None,
APIAction::Post(PostData::Data(&[(
"oauth_verifier",
Cow::Owned(oauth_verifier),
)])),
&[("oauth_token", returned_params["oauth_token"])],
)?
.text()?;
let returned_params: HashMap<&str, &str> = res
.split('&')
.map(|s| s.split('=').collect_tuple())
.collect::<Option<_>>()
.ok_or("Unexpected oauth step 3 response")?;
println!(
"Authorized for {}.\nRun with {}={} {}={}",
returned_params["screen_name"],
USER_TOKEN_ENV_VAR,
returned_params["oauth_token"],
USER_SECRET_ENV_VAR,
returned_params["oauth_token_secret"]
);
Ok(())
}
fn upload_image(user_token: &oauth1::Token, img: Cow<'static, [u8]>) -> StdError<u64> {
let form = reqwest::blocking::multipart::Form::new()
.part("media", reqwest::blocking::multipart::Part::bytes(img));
let res: serde_json::Value = twitter_api(
"https://upload.twitter.com/1.1/media/upload.json".try_into()?,
Some(user_token),
APIAction::Post(PostData::Multipart(form)),
&[],
)?
.json()?;
Ok(res["media_id"].as_u64().ok_or("media_id not u64!")?)
}
pub fn tweet(text: &str, img: Option<Cow<'static, [u8]>>) -> StdError<()> {
let user_token = oauth1::Token::new(
std::env::var(USER_TOKEN_ENV_VAR)?,
std::env::var(USER_SECRET_ENV_VAR)?,
);
let user: serde_json::Value = twitter_api(
TwitterEndpoint::VerifyCredentials.try_into()?,
Some(&user_token),
APIAction::Get,
&[],
)?
.json()?;
println!(
"Tweeting for user @{}, (id: {})",
user["screen_name"], user["id"]
);
let mut post_data = vec![("status", Cow::Borrowed(text))];
if let Some(img) = img {
println!("Uploading image...");
let img_id = upload_image(&user_token, img)?;
post_data.push(("media_ids", Cow::Owned(img_id.to_string())))
}
event!(Level::INFO, "Sending tweet...");
twitter_api(
TwitterEndpoint::UpdateStatus.try_into()?,
Some(&user_token),
APIAction::Post(PostData::Data(&post_data[0..])),
&[],
)?;
Ok(())
}

204
src/wiki.rs Normal file
View File

@ -0,0 +1,204 @@
use std::collections::HashMap;
use crate::{get_client, StdError};
use regex::Regex;
use serde::Deserialize;
use std::fmt::Display;
use tracing::{event, instrument, Level};
use url::Url;
// Filter a filename string for filenames
fn filter_filename(filename: &str) -> bool {
let re = Regex::new("ISO.7010.[EWMPF][0-9]{3}.*").unwrap();
re.is_match(filename)
}
// Scrape all images from the wikipedia page, returning a vec of title, filename pairs
pub fn scrape_web() -> StdError<Vec<(String, String)>> {
event!(Level::INFO, "Scraping the wikipedia page for things");
// Parse CSS selectors to scrape elements
let gallerybox_sel = scraper::Selector::parse(".mw-body-content li.gallerybox")
.map_err(|e| format!("{:?}", e))?;
let link_sel = scraper::Selector::parse("a.image").map_err(|e| format!("{:?}", e))?;
let title_sel = scraper::Selector::parse(".gallerytext p").map_err(|e| format!("{:?}", e))?;
// Fetch stuff!
let client = get_client(None)?;
event!(Level::INFO, "Fetching wiki page");
let txt = client
.get("https://en.wikipedia.org/wiki/ISO_7010")
.send()?
.text()?;
let page = scraper::Html::parse_document(txt.as_str());
return Ok(page
.select(&gallerybox_sel)
.map(|a| {
let link = a
.select(&link_sel)
.next()
.unwrap()
.value()
.attr("href")
.unwrap()
.to_owned();
let title = a
.select(&title_sel)
.next()
.unwrap()
.text()
.collect::<String>()
.trim()
.to_owned();
(title, link)
})
// Filter for filenames that look like ISO diagrams
.filter(|tup| filter_filename(&tup.1))
// Extract the file name only (.e.g `File:ISO_7010_X000.svg`)
.filter_map(|(title, link)| {
link.split('/')
.next_back()
.map(|end| (title, end.to_owned()))
})
.collect::<Vec<_>>());
}
#[instrument]
pub fn wiki_query_url(params: Vec<(&str, &str)>) -> StdError<Url> {
let mut url = Url::parse("https://commons.wikimedia.org/w/api.php?action=query&format=json")?;
url.query_pairs_mut().extend_pairs(params);
Ok(url)
}
// https://commons.wikimedia.org/w/api.php?action=query&format=json&list=categorymembers&cmtitle=Category:ISO_7010_safety_signs_(vector_drawings)&cmlimit=2
#[instrument]
pub fn get_files_in_category(category: &str) -> StdError<Vec<String>> {
let client = get_client(None)?;
let url = wiki_query_url(
[
("list", "categorymembers"),
("cmtitle", category),
("cmtype", "file"),
("cmlimit", "max"),
]
.into(),
)?;
let data = client.get(url).send()?.json::<serde_json::Value>()?;
if data.get("continue").is_some() {
// There are more results than are contained in one response, so now you need to implement
// pagination. Have fun!
panic!("Wikimedia query result is paginated!");
}
Ok(data["query"]["categorymembers"]
.as_array()
.unwrap()
.iter()
.filter_map(|m| Some(m.as_object().unwrap()["title"].as_str()?.replace(' ', "_")))
.collect())
}
#[derive(Debug)]
pub struct FileMeta {
pub url: url::Url,
pub name: String,
pub html_url: url::Url,
pub author: String,
pub attribution_required: String,
pub license_short_name: String,
pub license_url: Option<url::Url>,
}
impl Display for FileMeta {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"FileMeta{{url: {}, name: {}, html_url: {}, author: {}, attribution_required: {}, license_short_name: {}, license_url: {}}}",
self.url, self.name, self.html_url, self.author, self.attribution_required, self.license_short_name,
self.license_url.clone().map_or("None".to_owned(), |u| u.to_string()) // Ew.
)
}
}
// Partial representation of the data returned from a MediaWiki imageinfo query
#[derive(Deserialize)]
struct Query {
query: QueryInner,
}
#[derive(Deserialize, Debug)]
struct QueryInner {
pages: HashMap<String, Page>,
}
#[derive(Deserialize, Debug)]
struct Page {
imageinfo: Vec<ImageInfo>,
}
#[derive(Deserialize, Debug)]
struct ImageInfo {
user: String,
url: url::Url,
descriptionurl: url::Url,
extmetadata: ExtMeta,
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "PascalCase")]
struct ExtMeta {
object_name: ExtMetaItem<String>,
license_short_name: ExtMetaItem<String>,
attribution_required: ExtMetaItem<String>,
license_url: Option<ExtMetaItem<url::Url>>,
}
#[derive(Deserialize, Debug, Clone)]
struct ExtMetaItem<T> {
value: T,
}
pub fn get_file_metadata(files: Vec<&str>) -> StdError<Vec<FileMeta>> {
let client = get_client(None)?;
// Api only lets us do 50 files in one request
Ok(files
.chunks(50)
.flat_map(|files_chunk| {
let url = wiki_query_url(
[
("titles", files_chunk.join("|").as_ref()),
("prop", "imageinfo"),
(
"iiprop",
"timestamp|url|size|mime|mediatype|extmetadata|user",
),
// Get metadata for as many revisions of the file as we are allowed. We're unlikely to encounter a file with >500 revisions.
("iilimit", "500"),
(
"iiextmetadatafilter",
"ObjectName|LicenseShortName|AttributionRequired|LicenseUrl",
),
]
.into(),
)
.unwrap();
let data = client.get(url).send().unwrap().json::<Query>().unwrap();
data.query
.pages
.values()
.map(|page| {
let latest = page.imageinfo.first().unwrap();
let oldest = page.imageinfo.last().unwrap();
FileMeta {
url: latest.url.clone(),
name: latest.extmetadata.object_name.value.clone(),
html_url: latest.descriptionurl.clone(),
author: oldest.user.clone(),
license_short_name: latest.extmetadata.license_short_name.value.clone(),
license_url: latest.extmetadata.license_url.clone().map(|i| i.value),
attribution_required: latest.extmetadata.attribution_required.value.clone(),
}
})
.collect::<Vec<_>>()
})
.collect())
}