|
| 1 | +import json |
| 2 | +import os |
| 3 | +from collections import defaultdict |
| 4 | +from datetime import date, datetime, time, timedelta |
| 5 | + |
| 6 | +from pydantic import BaseModel |
| 7 | +from pydantic.class_validators import root_validator |
| 8 | +from slugify import slugify |
| 9 | + |
| 10 | + |
| 11 | +class SpeakerQuestion: |
| 12 | + affiliation = "Company / Organization / Educational Institution" |
| 13 | + homepage = "Social (Homepage)" |
| 14 | + twitter = "Social (X/Twitter)" |
| 15 | + mastodon = "Social (Mastodon)" |
| 16 | + |
| 17 | + |
| 18 | +class SubmissionQuestion: |
| 19 | + outline = "Outline" |
| 20 | + tweet = "Abstract as a tweet / toot" |
| 21 | + |
| 22 | + |
| 23 | +class SubmissionState: |
| 24 | + accepted = "accepted" |
| 25 | + confirmed = "confirmed" |
| 26 | + withdrawn = "withdrawn" |
| 27 | + |
| 28 | + |
| 29 | +class PretalxAnswer(BaseModel): |
| 30 | + question_text: str |
| 31 | + answer_text: str |
| 32 | + answer_file: str | None |
| 33 | + submission_id: str | None |
| 34 | + speaker_id: str | None |
| 35 | + |
| 36 | + @root_validator(pre=True) |
| 37 | + def extract(cls, values): |
| 38 | + values["question_text"] = values["question"]["question"]["en"] |
| 39 | + values["answer_text"] = values["answer"] |
| 40 | + values["answer_file"] = values["answer_file"] |
| 41 | + values["submission_id"] = values["submission"] |
| 42 | + values["speaker_id"] = values["person"] |
| 43 | + return values |
| 44 | + |
| 45 | + |
| 46 | +class PretalxSpeaker(BaseModel): |
| 47 | + code: str |
| 48 | + name: str |
| 49 | + biography: str | None |
| 50 | + avatar: str | None |
| 51 | + slug: str |
| 52 | + answers: list[PretalxAnswer] |
| 53 | + submissions: list[str] |
| 54 | + |
| 55 | + # Extracted |
| 56 | + affiliation: str | None = None |
| 57 | + homepage: str | None = None |
| 58 | + twitter: str | None = None |
| 59 | + mastodon: str | None = None |
| 60 | + |
| 61 | + @root_validator(pre=True) |
| 62 | + def extract(cls, values): |
| 63 | + values["slug"] = slugify(values["name"]) |
| 64 | + |
| 65 | + answers = [PretalxAnswer.parse_obj(ans) for ans in values["answers"]] |
| 66 | + |
| 67 | + for answer in answers: |
| 68 | + if answer.question_text == SpeakerQuestion.affiliation: |
| 69 | + values["affiliation"] = answer.answer_text |
| 70 | + |
| 71 | + if answer.question_text == SpeakerQuestion.homepage: |
| 72 | + values["homepage"] = answer.answer_text |
| 73 | + |
| 74 | + # NOTE: in practice the format of the data here is different, |
| 75 | + # depending on the speaker. We could fix this here by parsing the |
| 76 | + # the answer_text to some standardised format (either @handle or |
| 77 | + # https://twitter.com/handle url, etc) |
| 78 | + if answer.question_text == SpeakerQuestion.twitter: |
| 79 | + values["twitter"] = answer.answer_text |
| 80 | + |
| 81 | + if answer.question_text == SpeakerQuestion.mastodon: |
| 82 | + values["mastodon"] = answer.answer_text |
| 83 | + |
| 84 | + # Remove all the other answers |
| 85 | + # This is important, because some answers might contain non-public |
| 86 | + # information |
| 87 | + values["answers"] = [] |
| 88 | + |
| 89 | + return values |
| 90 | + |
| 91 | + |
| 92 | +class PretalxSubmission(BaseModel): |
| 93 | + code: str |
| 94 | + title: str |
| 95 | + speakers: list[str] # We only want the code, not the full info |
| 96 | + submission_type: str |
| 97 | + slug: str |
| 98 | + track: str | None |
| 99 | + state: str |
| 100 | + abstract: str |
| 101 | + answers: list[PretalxAnswer] |
| 102 | + tweet: str = "" |
| 103 | + outline: str |
| 104 | + duration: str |
| 105 | + |
| 106 | + level: str = "" |
| 107 | + delivery: str | None = "" |
| 108 | + |
| 109 | + # This is embedding a slot inside a submission for easier lookup later |
| 110 | + room: str | None = None |
| 111 | + start: datetime | None = None |
| 112 | + end: datetime | None = None |
| 113 | + |
| 114 | + # TODO: once we have schedule data then we can prefill those in the code |
| 115 | + # here |
| 116 | + talks_in_parallel: list[str] | None = None |
| 117 | + talks_after: list[str] | None = None |
| 118 | + next_talk_code: str | None = None |
| 119 | + prev_talk_code: str | None = None |
| 120 | + |
| 121 | + website_url: str | None = None |
| 122 | + |
| 123 | + @root_validator(pre=True) |
| 124 | + def extract(cls, values): |
| 125 | + # # SubmissionType and Track have localised names. For this project we |
| 126 | + # # only care about their english versions, so we can extract them here |
| 127 | + for field in ["submission_type", "track"]: |
| 128 | + if values[field] is None: |
| 129 | + continue |
| 130 | + else: |
| 131 | + # In 2024 some of those are localised, and some are not. |
| 132 | + # Instead of figuring out why and fixing the data, there's this |
| 133 | + # hack: |
| 134 | + if isinstance(values[field], dict): |
| 135 | + values[field] = values[field]["en"] |
| 136 | + |
| 137 | + values["speakers"] = {s["code"] for s in values["speakers"]} |
| 138 | + |
| 139 | + answers = [PretalxAnswer.parse_obj(ans) for ans in values["answers"]] |
| 140 | + |
| 141 | + for answer in answers: |
| 142 | + if answer.question_text == SubmissionQuestion.outline: |
| 143 | + values["outline"] = answer.answer_text |
| 144 | + if answer.question_text == SubmissionQuestion.tweet: |
| 145 | + values["tweet"] = answer.answer_text |
| 146 | + |
| 147 | + # TODO if we need any other questions |
| 148 | + |
| 149 | + slug = slugify(values["title"]) |
| 150 | + values["slug"] = slug |
| 151 | + values["website_url"] = f"https://ep2024.europython.eu/session/{slug}" |
| 152 | + |
| 153 | + # Remove all the other answers |
| 154 | + # This is important, because some answers might contain non-public |
| 155 | + # information |
| 156 | + values["answers"] = [] |
| 157 | + |
| 158 | + return values |
| 159 | + |
| 160 | + @property |
| 161 | + def is_accepted(self): |
| 162 | + return self.state == SubmissionState.accepted |
| 163 | + |
| 164 | + @property |
| 165 | + def is_confirmed(self): |
| 166 | + return self.state == SubmissionState.confirmed |
| 167 | + |
| 168 | + @property |
| 169 | + def is_publishable(self): |
| 170 | + return self.is_accepted or self.is_confirmed |
| 171 | + |
| 172 | + |
| 173 | +def parse_submissions() -> list[PretalxSubmission]: |
| 174 | + """ |
| 175 | + Returns only confirmed talks |
| 176 | + """ |
| 177 | + with open("../data/raw/europython-2024/submissions_latest.json") as fd: |
| 178 | + js = json.load(fd) |
| 179 | + subs = [] |
| 180 | + for item in js: |
| 181 | + sub = PretalxSubmission.parse_obj(item) |
| 182 | + subs.append(sub) |
| 183 | + |
| 184 | + return subs |
| 185 | + |
| 186 | + |
| 187 | +def parse_speakers() -> list[PretalxSpeaker]: |
| 188 | + """ |
| 189 | + Returns only speakers with confirmed talks |
| 190 | + """ |
| 191 | + with open("../data/raw/europython-2024/speakers_latest.json") as fd: |
| 192 | + js = json.load(fd) |
| 193 | + speakers = [] |
| 194 | + for item in js: |
| 195 | + speaker = PretalxSpeaker.parse_obj(item) |
| 196 | + speakers.append(speaker) |
| 197 | + |
| 198 | + return speakers |
| 199 | + |
| 200 | + |
| 201 | +def publishable_submissions() -> dict[str, PretalxSubmission]: |
| 202 | + return {s.code: s for s in parse_submissions() if s.is_publishable} |
| 203 | + |
| 204 | + |
| 205 | +def publishable_speakers(accepted_proposals: set[str]) -> dict[str, PretalxSpeaker]: |
| 206 | + sp = parse_speakers() |
| 207 | + output = {} |
| 208 | + for speaker in sp: |
| 209 | + accepted = set(speaker.submissions) & accepted_proposals |
| 210 | + if accepted: |
| 211 | + # Overwrite with only the accepted proposals |
| 212 | + speaker.submissions = list(accepted) |
| 213 | + output[speaker.code] = speaker |
| 214 | + |
| 215 | + return output |
| 216 | + |
| 217 | + |
| 218 | +print(len(parse_submissions())) |
| 219 | +print(len(accepted := publishable_submissions())) |
| 220 | + |
| 221 | +print(len(parse_speakers())) |
| 222 | +print(len(publishable_speakers(accepted.keys()))) |
| 223 | + |
| 224 | +print(publishable_speakers(accepted.keys())) |
| 225 | + |
| 226 | + |
| 227 | +from pprint import pprint |
| 228 | + |
| 229 | +pprint(accepted) |
| 230 | + |
| 231 | +# Check if all the slugs are unique |
| 232 | +assert len(set(s.slug for s in accepted.values())) == len(accepted) |
0 commit comments