tangled
alpha
login
or
join now
pvsr.dev
/
podcasts
1
fork
atom
A personal podcast client
1
fork
atom
overview
issues
pulls
pipelines
better detection of new/missing episodes
pvsr.dev
3 years ago
1237f0de
b9eba61f
+26
-24
1 changed file
expand all
collapse all
unified
split
podcasts
fetch.py
+26
-24
podcasts/fetch.py
···
57
57
config = Config.load()
58
58
os.chdir(annex_dir)
59
59
60
60
-
last = {
61
61
-
row.slug: {
62
62
-
"old_eps": row.eps,
63
63
-
"last_fetch": datetime.fromisoformat(row.last_fetch),
64
64
-
}
65
65
-
for row in db.session.execute(
66
66
-
text(
67
67
-
"""
68
68
-
select p.slug, p.last_fetch, count(*) as eps
69
69
-
from episode e inner join podcast p on e.podcast_slug = p.slug
70
70
-
group by podcast_slug"""
71
71
-
)
60
60
+
rows = db.session.execute(
61
61
+
text(
62
62
+
"""
63
63
+
select e.id, p.slug, p.last_fetch
64
64
+
from episode e inner join podcast p on e.podcast_slug = p.slug
65
65
+
group by podcast_slug"""
72
66
)
73
73
-
}
74
74
-
fallback_status = {"old_eps": 0, "last_fetch": None}
67
67
+
)
68
68
+
69
69
+
last_fetch: dict[str, datetime] = {}
70
70
+
old_eps: dict[str, set[str]] = {}
71
71
+
for row in rows:
72
72
+
last_fetch.setdefault(row.slug, datetime.fromisoformat(row.last_fetch))
73
73
+
old_eps.setdefault(row.slug, set()).add(row.id)
74
74
+
75
75
parsed_feeds = await asyncio.gather(
76
76
*[
77
77
asyncio.to_thread(
78
78
process_feed,
79
79
podcast,
80
80
-
**last.get(podcast.slug, fallback_status),
80
80
+
old_eps.get(podcast.slug, set()),
81
81
+
last_fetch.get(podcast.slug, None),
81
82
)
82
83
for podcast in config.podcasts
83
84
]
···
86
87
asyncio.to_thread(
87
88
download_feed,
88
89
podcast,
89
89
-
last.get(podcast.slug, fallback_status)["last_fetch"],
90
90
+
last_fetch.get(podcast.slug, None),
90
91
podcast.url,
91
92
)
92
93
for podcast in config.passthru
···
99
100
)
100
101
if len(feeds) == 0:
101
102
return
102
102
-
last_fetch = datetime.now()
103
103
+
now = datetime.now()
103
104
podcasts = [
104
105
PodcastDb(
105
106
slug=feed.slug,
···
107
108
image=feed.parsed.feed.image.href,
108
109
image_title=feed.parsed.feed.image.title,
109
110
last_ep=feed.last_ep,
110
110
-
last_fetch=last_fetch,
111
111
+
last_fetch=now,
111
112
url=feed.url,
112
113
episodes=[
113
114
EpisodeDb(
···
147
148
148
149
149
150
def process_feed(
150
150
-
podcast: Podcast, old_eps: int, last_fetch: datetime | None
151
151
+
podcast: Podcast, old_eps: set[str], last_fetch: datetime | None
151
152
) -> FeedData | None:
152
153
feed = download_feed(podcast, last_fetch)
153
154
if not feed:
154
155
return None
155
156
156
156
-
new_eps = len(feed.parsed.entries)
157
157
-
if len(feed.parsed.entries) <= old_eps:
158
158
-
print(
159
159
-
f"{podcast.slug}: we have {old_eps} while remote has {new_eps}, skipping import"
160
160
-
)
157
157
+
new_eps = {ep.id for ep in feed.parsed.entries}
158
158
+
if len(old_eps - new_eps) == 0:
159
159
+
print(f"{podcast.slug}: no new episodes, skipping import")
160
160
+
elif len(new_eps - old_eps) > 0:
161
161
+
print(f"{podcast.slug}: existing episodes are missing, skipping import")
161
162
else:
163
163
+
print(f"{podcast.slug}: new episodes: {[e.title for e in new_eps]}")
162
164
print(f"{podcast.slug}: annexing {podcast.url}")
163
165
annex_cmd = run(
164
166
[