From df964b872290fe70c6f2b385b8eea7ebd279bebd Mon Sep 17 00:00:00 2001 From: Benjamin Sigonneau Date: Sat, 29 May 2021 00:52:25 +0200 Subject: [PATCH] Initial commit --- .gitignore | 2 + README.md | 61 +++++++++++++ balises.ini.example | 11 +++ balises.py | 181 +++++++++++++++++++++++++++++++++++++++ balises.service | 18 ++++ views/base.tpl | 15 ++++ views/search_form.tpl | 1 + views/search_results.tpl | 7 ++ 8 files changed, 296 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 balises.ini.example create mode 100755 balises.py create mode 100644 balises.service create mode 100644 views/base.tpl create mode 100644 views/search_form.tpl create mode 100644 views/search_results.tpl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6acfbb8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +balises.ini diff --git a/README.md b/README.md new file mode 100644 index 0000000..7f90454 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +Radio Balises is a local radio that broadcasts in the Lorient area in +France. The website of the radio offers a list of the last 10 songs, no +more. There is no way to search for a song that was broadcast on a +particular date, except for those last 10 songs. + +The goals for this toy project are: + +* parse the 'last 10 songs' page and store them in a database ; +* offer a simple web interface to search songs played on a particular + date, if they are stored in the database. + +Ideally, the 'last 10 songs' list should be regularly retrieved, so that +the database is complete enough to be useful. Then, I may be able to +find out what was that song that I heard 3 days ago while I was driving. + + +# Requirements + +This project is written in Python and uses the following libraries: + +* requests: to retrieve the 'last 10 songs' page; +* beautiful soup: to parse that page; +* peewee: a simple ORM, to store and query the data in a SQL database; +* bottle: a micro web framework. + +Those libraries are all packaged in Debian and can be installed with: + +``` +apt install python3-request python3-bs4 python3-peewee python3-bottle +``` + +It also needs a MySQL server (although switching to another database +should be easy). + + +# Installation + +We suppose the repository is cloned under `/opt/balises`. + +Create a `balises.py` config file, and fill in MySQL credentials. You +can use `balises.ini.example as a starting point. + +To update the database every 15 minutes, the following line can be +installed as a cronjob: + +``` +*/15 * * * * /opt/balises/balises.py update +``` + +To launch the server automatically, you can install and use the systemd +service file : + +``` +ln -s /opt/balises/balises.service /etc/systemd/system/balises.service +systemctl enable balises.service +systemctl start balises.service +``` + +The server listens to port 9980 on localhost by default, unless +otherwise specified in the config file. Setting up a reverse proxy and +ssl is left as an exercise to the reader. diff --git a/balises.ini.example b/balises.ini.example new file mode 100644 index 0000000..d603fc1 --- /dev/null +++ b/balises.ini.example @@ -0,0 +1,11 @@ +[mysql] +database = balises +user = please change me +password = please change me +host = localhost +port = 3306 + +[server] +host = localhost +port = 9980 +debug = false \ No newline at end of file diff --git a/balises.py b/balises.py new file mode 100755 index 0000000..f6bfe6d --- /dev/null +++ b/balises.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 + +from argparse import ArgumentParser +from configparser import ConfigParser +from datetime import datetime, timedelta + +import bs4 +import requests + +from peewee import * +from bottle import hook, request, route, run, view + +# ------------------------------------------------------------ +conf = ConfigParser() +conf.read('balises.ini') + +db = MySQLDatabase(conf['mysql'].get('database'), + user=conf['mysql'].get('user'), + password=conf['mysql'].get('password'), + host=conf['mysql'].get('host', 'localhost'), + port=conf['mysql'].getint('port', 3306)) + +class BaseModel(Model): + class Meta: + database = db + +class Song(BaseModel): + id = AutoField() + artist = CharField(default='') + title = CharField(default='') + + class Meta: + indexes = ( + (('artist', 'title'), True), # Unique on artist + title + ) + +class AirCast(BaseModel): + id = AutoField() + date = DateTimeField() + song = ForeignKeyField(Song, backref='dates') + + +# ---------------------------------------------------------------------- +# Util functions + +def http_get(url): + response = requests.get(url); + if response.status_code == 200: + pass + else: + print('Uh, oh, unable to fetch', url) + print('Http status code:', response.status_code) + raise Error('Download error') + return response + + +# ---------------------------------------------------------------------- +# Get song informations + +last_ten_url = 'https://radiobalises.com/Play-list/last10.html' + +def get_last_ten(): + response = http_get(last_ten_url) + soup = bs4.BeautifulSoup(response.content, 'html5lib') + dates = soup.select('p.rldj-cell span.post-date') + for elem in dates: + dt = datetime.fromisoformat(elem.text) + artist, title = [x.strip() for x in elem.previous.previous.split(' - ', 1)] + song, _ = Song.get_or_create(artist=artist, title=title) + # get_or_create does not play nice with the unique constraint on the date + # so we use a simple try/except instead + try: + aircast = AirCast.get(date=dt, song=song) + except DoesNotExist: + aircast = AirCast.create(date=dt, song=song) + + line_template = '{:<10} | {:<25} {:<40}' + print(line_template.format(str(dt), artist, title)) + + +# ---------------------------------------------------------------------- +# Search song + +def search_song(query): + # query must be a datetime + delta = timedelta(minutes=30) + + query = AirCast.select().order_by(AirCast.date).where( + (AirCast.date > query - delta) & + (AirCast.date < query + delta)) + results = [x for x in query] + + return results + +def print_aircast(x): + line_template = '{:<10} | {:<25} {:<40}' + print(line_template.format(str(x.date), x.song.artist, x.song.title)) + + +# ---------------------------------------------------------------------- +# Web application +# +# Very simple, just a page with an input for a date/time, a query button and a +# list of results + +@route('/', method='GET') +@view('search_form') +def main_page(): + now = datetime.now() + date = '{}'.format(now.date()) + time = '{}:{}'.format(now.hour, now.minute) + return dict(title='', date=date, time=time) + +@route('/', method='POST') +@view('search_results') +def results_page(): + date = request.forms.date + time = request.forms.time + dt = datetime.fromisoformat('{} {}'.format(date, time)) + results = search_song(dt) + return dict(results=[x for x in results], date=date, time=time) + +@hook('before_request') +def connect_to_db(): + db.connect() + +@hook('after_request') +def close_db_connection(): + db.close() + +# ---------------------------------------------------------------------- +# Argument parsing +# use a decorator to simplify argparse usage, as suggested by +# https://mike.depalatis.net/blog/simplifying-argparse.html + +cli = ArgumentParser(description='Balises') +subparsers = cli.add_subparsers(dest="subcommand") + +def subcommand(args=[], parent=subparsers): + def decorator(func): + parser = parent.add_parser(func.__name__, description=func.__doc__) + for arg in args: + parser.add_argument(*arg[0], **arg[1]) + parser.set_defaults(func=func) + return decorator + +def argument(*name_or_flags, **kwargs): + return ([*name_or_flags], kwargs) + + +@subcommand([argument('query', help='Search query')]) +def search(args): + results = search_song(datetime.fromisoformat(args.query)) + for res in results: + print_aircast(res) + + +@subcommand() +def update(args): + get_last_ten() + + +@subcommand() +def serve(args): + run(host=conf['server'].get('host', 'localhost'), + port=conf['server'].getint('port', 9980), + debug=conf['server'].getboolean('debug', False)) + + +# ---------------------------------------------------------------------- +# Main + +def main(): + args = cli.parse_args() + if args.subcommand is None: + cli.print_help() + else: + args.func(args) + +if __name__ == '__main__': + main() diff --git a/balises.service b/balises.service new file mode 100644 index 0000000..1a27f36 --- /dev/null +++ b/balises.service @@ -0,0 +1,18 @@ +[Unit] +Description=Radio Balises titles fetcher +After=syslog.target +After=network.target +Requires=mysql.service +#Requires=mariadb.service + +[Service] +RestartSec=2s +Type=simple +User=balises +Group=balises +WorkingDirectory=/opt/balises +ExecStart=/opt/balises/balises.py serve +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/views/base.tpl b/views/base.tpl new file mode 100644 index 0000000..3113445 --- /dev/null +++ b/views/base.tpl @@ -0,0 +1,15 @@ + + + {{title or 'No title'}} + + +
+ + + + + +
+ {{!base}} + + diff --git a/views/search_form.tpl b/views/search_form.tpl new file mode 100644 index 0000000..ca73eea --- /dev/null +++ b/views/search_form.tpl @@ -0,0 +1 @@ +% rebase('base.tpl', title='Radio Balises') diff --git a/views/search_results.tpl b/views/search_results.tpl new file mode 100644 index 0000000..4f6fb1c --- /dev/null +++ b/views/search_results.tpl @@ -0,0 +1,7 @@ +% rebase('base.tpl', title='Radio Balises - Résultats') +

Liste des chansons

+ \ No newline at end of file