Github, Python, Reddit and more
23 Sep 2013Ever find yourself wondering what the name of that song was you listened to a week ago on YouTube? This happens to me all the time, I browse the /r/electronicmusic subreddit almost everyday to find new music to listen to. I would upvote songs and then find myself a week later attempting to look through my history for songs I liked but it was always a hassle to weed through all my upvoted content just to find songs to listen to.
Knowing Reddit offered an API I thought there was probably a way I could automate reading through my history and listing all the songs I liked saving myself the hours of manual labor (I am very lazy…err, I mean efficient).
I wanted to accomplish three things with this mini project:
- Write a python script
- Get myself on GitHub
- A consistently updated list of good music to listen to
Overview
This python script will call Reddit’s API, parse a users ‘liked’ history for a given subreddit and output a list of HTML links to a file.
Usage
- Download the redditlikedlist.py and rll.cfg to a directory of your choice.
- Populate the following fields in the rll.cfg file:
-
Execute the python script
$> python redditlikedlist.py rll.cfg
The first time the script is run it will start with your most recent liked links and work backwards into the past. Reddit will allow you to parse your most recent 1000 links. After the first run the script stores the most recent link in the ‘beforelinkname’ in the configuration file. Subsequent executions the script will start parsing from this link and work forward to present time
Below I will post the most current version of this script, however please see the [GitHub repository] ( https://github.com/ProfessionalAmateur/Reddit-Liked-List-Builder) for the most up-to-date version.
Configuration File
[PATHS] outputfile = (The destination of the output file. Example: /home/John/musiclist.html) subreddit = (The subreddit you want to parse from. Example: electronicmusic) [WAYPOINT] beforelinkname = (leave blank, the script will populate) [CREDENTIALS] username = (Your Reddit username, must have a valid Reddit account to use this script.) password = (Your Reddit password, must have a valid Reddit account to use this script.) useragent = (Reddit requires a unique user agent for all calls to its API, it is recommended you incorporate your username in the agent. Example: BobaFett37's Liked List Parse)
Python Script
Python 3+ is required to run this script. I have another mostly working version on GitHub for Python 2.6.6
# This script will login to Reddit, return all liked stories for a given user # parse all the subreddit likes and build and output for # a website listing. # import time import datetime import urllib.request, urllib.parse, urllib.error import urllib.request, urllib.error, urllib.parse import http.cookiejar import json import configparser import logging import tempfile import os import argparse # Variables hdr = {} before_flag = False link_value = '' liked_url = 'https://ssl.reddit.com/user/<username>/liked.json?limit=100&<direction>=<link_name>' cj = http.cookiejar.CookieJar() cfg_file = '' final_file_location = '' username = '' password = '' subreddit = '' iCounter = 0 tmpfile = tempfile.TemporaryFile() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) # Parse input def parse_input_params(): global cfg_file parser = argparse.ArgumentParser(description='Music list built from liked stories within a subreddit') parser.add_argument("config_file", help="Configuration file location") args = parser.parse_args() cfg_file = args.config_file # Load config file def get_config(): config = configparser.SafeConfigParser() config.optionxform(str()) try: config.read(cfg_file) return config except Exception as e: logging.error(e) # Retrieve values from config file def load_config_values(config): try: global final_file_location global username global password global subreddit global hdr global before_flag global link_value final_file_location = config.get('PATHS','outputFile') subreddit = config.get('PATHS','subreddit') link_value = config.get('WAYPOINT','beforeLinkName') if link_value: before_flag = True username = config.get('CREDENTIALS','username') password = config.get('CREDENTIALS','password') hdr['User-Agent'] = config.get('CREDENTIALS','useragent') except Exception as e: logging.error(e) # Reddit Login Function def login(username, passwd): values = {'user': username, 'api_type': 'json', 'passwd': passwd} login_url = urllib.request.Request('https://ssl.reddit.com/api/login/', headers=hdr) data = urllib.parse.urlencode(values) data_bytes = data.encode('utf-8') try: response = opener.open(login_url, data_bytes).read() except Exception as e: logging.error(e) def process_reddit_data(): global link_value global tmpfile global iCounter try: while (link_value is not None): time.sleep(3) liked_json = retrieve_liked(username) if (before_flag == False): link_value = json.loads(liked_json)["data"]["after"] else: link_value = json.loads(liked_json)["data"]["before"] liked_json = json.loads(liked_json)["data"]["children"] for titles in liked_json: iCounter += 1 if (iCounter == 1): write_config_values(titles["data"]["name"]) if(titles["data"]["subreddit"]==subreddit and titles["data"]["media"] is not None): tmpfile.write(bytes('<a href=\''+ titles["data"]["url"] + '\'>' + titles["data"]["title"] + '</a><br/>\n', 'utf-8' )) except Exception as e: logging.error(e) # Fetch liked content for a user def retrieve_liked(username): try: if(before_flag == True): direction = 'before' else: direction = 'after' repl = {'<username>':username, '<link_name>':link_value, '<direction>':direction} url = replace_all(liked_url, repl) url = urllib.request.Request(url, headers=hdr) r = opener.open(url).read() response = r.decode('utf-8') return response except Exception as e: logging.error(e) # Write/Update config file def write_config_values(before_link): try: configVal.set('WAYPOINT', 'beforeLinkName', before_link) f = open(cfg_file, 'w') configVal.write(f) f.close except Exception as e: logging.error(e) def updated_timestamp(): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%m-%d-%Y %H:%M:%S') return("Last updated at: " + st + "<br/><br/>\n") def write_output(): global tmpfile try: if os.path.exists(final_file_location): #final output file aleady exists, we need to append new data. f2 = open(final_file_location, 'r') for i in range(1): next(f2) for line in f2: tmpfile.write(bytes(line, 'utf-8')) f2.close() tmpfile.seek(0) f = open(final_file_location, 'wb') f.write(bytes(updated_timestamp(), 'utf-8')) for line in tmpfile: f.write(bytes(line)) f.close() tmpfile.close() except Exception as e: logging.error(e) # generic replace text using dict function def replace_all(text, dic): for i, j in dic.items(): text = text.replace(i, j) return text ########################################################### # Main Processing ########################################################### parse_input_params() configVal = get_config() load_config_values(configVal) # Call login and retrieve liked content. Each call must separated by at least 2 seconds. login(username, password) process_reddit_data() write_output()