#!/usr/bin/env python3 # # getfeeds.py - A simple Python script which pulls entries from a list of # RSS feeds, formats them "nicely" as HTML, and dumps that HTML to stdout. # # Copyright © 2025 Jeff Clough # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that # the above copyright notice appear in all copies and that both that # copyright notice and this permission notice appear in supporting # documentation. No representations are made about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. import time from datetime import datetime, timedelta import feedparser import re # A list of feeds to get. Feeds will be scraped in the order listed here. # We unconditionally grab the whole feed every time we run, so if you're # doing testing, it's more polite to download the feed to a file, then # use those filenames here instead of fingerblasting the server every time. # (See the documentation for feedparser for more information.) feeds = [ # ProPublica 'https://www.propublica.org/feeds/propublica/main', # Citation Needed 'https://www.citationneeded.news/rss/', # EFF Updates 'https://www.eff.org/rss/updates.xml', # Open Secrets 'https://www.opensecrets.org/news/feed', # TechDirt 'https://www.techdirt.com/feed/', # Sentencing Law and Policy (Dave Winer) 'https://sentencing.typepad.com/sentencing_law_and_policy/index.rdf', # Fix The News 'https://fixthenews.com/rss/', # It's Going Down 'https://itsgoingdown.org/feed/', # SCOTUS Blog 'https://www.scotusblog.com/feed/', # The Intercept 'https://theintercept.com/feed/?lang=en', # AlterNet 'https://www.alternet.org/feeds/feed.rss', # The American Prospect 'https://prospect.org/api/rss/all.rss', # Common Dreams 'https://www.commondreams.org/feeds/news.rss', # Daily KOS 'https://feeds.dailykos.com/', # Democracy Now 'https://www.democracynow.org/democracynow.rss' ] # The maximum number of days old an article can be before it's ignored. # Said differently, the script will output anything published more # recently than this number of days. max_days_old = 2 # These next few variables constitute the bulk of our HTML and styling. # You probably only want to change the "html_style" variable. # I tried to make the HTML source code clean. html_style = """ html { background-color: #ddd; color: #333; max-width: 80ch; margin: auto; font-family: sans-serif; } img { max-width: 75ch; } h2 a { color: #ddd; font-weight: bold; } h2 a:visited, a:hover, a:focus { color: #ccc; } h2 a:hover, a:focus { background-color: #ddd; } h2 a:active { color: #ddd; } main p { line-height: 1.6em; } article { margin-top: 1rem; margin-bottom: 1rem; border-style: solid; border-width: 1px; border-color: #aaa; } article p { padding-left: 1rem; padding-right: 1rem; } article h2 { background-color: #4c59e8; margin: 0rem; padding-left: 1rem; padding-right: 1rem; } article p.dateline { margin: 0rem; background-color: #4ca7e8; font-size: 0.75em; } """ # Slightly misnamed, this includes everything from the DOCTYPE tag, up # through the start of our list of entries. Note: this includes the style # stuff we just defined above. html_header = """ Today's News

Today's News

""" # Includes everything which should appear after our list of entries, # including the footer, closing body, and closing HTML tag. html_footer = """
""" # This is the end of the user-configurable bits. Past this point, here # be dragons. (Not really, the code isn't that complicated.) # Utility function to sensibly-convert the newlines within a "description" # to HTML p elements. This is very much hackerware. You can tell because # we use regular expressions. # # In RSS speak, a "description" is either a snippet, or the full text if # that's enabled in the feed. This can be any sort of HTML, can include # images, etc. That means we should do a lot more sanity checking and whatnot # here, rather than just manipulate the newlines and pray. # # The function builds a new string from the given text. This new string # is enclosed in "

" tags. Any number of newlines are replaced with a # single "

" construct (we use a regex to compress them). We also # strip leading and trailing whitespace including newlines. def p_ify(text): p_text = "

" for character in re.sub('\n{1,}', '\n', text.strip()): if character == "\n": p_text += "

" else: p_text += character p_text += "

" return p_text # This is used to not include "old" entries, where "old" is defined in the # "days=X" parameter of the call to timedelta. We set this way back up at # the top, remember? old = datetime.now() - timedelta(days=max_days_old) # Initialize an empty list for our entries. We throw everything into here # and sort it out later. entries = [] # Here's where we go through each feed and add the entries in each to our # list. Very brute force. Much not giving a shit about bandwidth. This is # why I recommended that you use local copies of the feeds when you test # changes. for feed in feeds: data = feedparser.parse(feed) for entry in data.entries: entries.append(entry) # We want to sort our entries in reverse-chronological order. # We do that in two steps. First, we run an in-place sort using the info # we get in each entry's "published_parsed". # We do that by converting the time.time_struct there into an ISO date # string and sorting it alphabetically. Then, we reverse the list. # It's ugly, but it works. entries.sort(key = lambda x: time.strftime("%Y-%m-%d %H:%M:%S", x.published_parsed)) entries.reverse() # Here's where we begin to build our HTML page. We throw everything at # stdout because we can. # Output the header. print(html_header) # Output the entries. # They're already sorted, so we just check to see if each entry is too old, # and if not, we output it wrapped in HTML. There are much more efficient # ways to do this, but we're lazy and don't care. # Of note, we use our p_ify function to fix up the descriptions. for entry in entries: if datetime.fromtimestamp(time.mktime(entry.published_parsed)) > old: print("
") print("

" + entry.title + "

") print("") print(p_ify(entry.description)) print("
") # We're done with the entries, output the footer and leave. print(html_footer)