!111: tests: aircox.management (#114)

!111

Co-authored-by: bkfox <thomas bkfox net>
Reviewed-on: rc/aircox#114
This commit is contained in:
Thomas Kairos
2023-06-30 16:39:55 +02:00
parent faecdf5495
commit f9ad81ddac
27 changed files with 1534 additions and 625 deletions

View File

@ -0,0 +1,8 @@
# aircox.controllers
This module provides the following controllers classes:
- `log_archiver.LogArchiver`: dumps and load gzip archives from Log models.
- `sound_file.SoundFile`: handle synchronisation between filesystem and database for a sound file.
- `sound_monitor.SoundMonitor`: monitor filesystem for changes on audio files and synchronise database.
- `sound_stats.SoundStats` (+ `SoxStats`): get audio statistics of an audio file using Sox.
- `diffuions.Diffusions`: generate, update and clean diffusions.
- `playlist_import.PlaylistImport`: import playlists from CSV.

View File

View File

@ -0,0 +1,112 @@
import gzip
import os
import yaml
from django.utils.functional import cached_property
from aircox.conf import settings
from aircox.models import Diffusion, Sound, Track, Log
__all__ = ("LogArchiver",)
class LogArchiver:
"""Commodity class used to manage archives of logs."""
@cached_property
def fields(self):
return Log._meta.get_fields()
@staticmethod
def get_path(station, date):
return os.path.join(
settings.LOGS_ARCHIVES_DIR_ABS,
"{}_{}.log.gz".format(date.strftime("%Y%m%d"), station.pk),
)
def archive(self, qs, keep=False):
"""Archive logs of the given queryset.
Delete archived logs if not `keep`. Return the count of archived
logs
"""
if not qs.exists():
return 0
os.makedirs(settings.LOGS_ARCHIVES_DIR_ABS, exist_ok=True)
count = qs.count()
logs = self.sort_logs(qs)
# Note: since we use Yaml, we can just append new logs when file
# exists yet <3
for (station, date), logs in logs.items():
path = self.get_path(station, date)
# FIXME: remove binary mode
with gzip.open(path, "ab") as archive:
data = yaml.dump(
[self.serialize(line) for line in logs]
).encode("utf8")
archive.write(data)
if not keep:
qs.delete()
return count
@staticmethod
def sort_logs(qs):
"""Sort logs by station and date and return a dict of `{
(station,date): [logs] }`."""
qs = qs.order_by("date")
logs = {}
for log in qs:
key = (log.station, log.date.date())
logs.setdefault(key, []).append(log)
return logs
def serialize(self, log):
"""Serialize log."""
return {i.attname: getattr(log, i.attname) for i in self.fields}
def load(self, station, date):
"""Load an archive returning logs in a list."""
path = self.get_path(station, date)
if not os.path.exists(path):
return []
return self.load_file(path)
def load_file(self, path):
with gzip.open(path, "rb") as archive:
data = archive.read()
logs = yaml.load(data)
# we need to preload diffusions, sounds and tracks
rels = {
"diffusion": self.get_relations(logs, Diffusion, "diffusion"),
"sound": self.get_relations(logs, Sound, "sound"),
"track": self.get_relations(logs, Track, "track"),
}
def rel_obj(log, attr):
rel_id = log.get(attr + "_id")
return rels[attr][rel_id] if rel_id else None
return [
Log(
diffusion=rel_obj(log, "diffusion"),
sound=rel_obj(log, "sound"),
track=rel_obj(log, "track"),
**log
)
for log in logs
]
@staticmethod
def get_relations(logs, model, attr):
"""From a list of dict representing logs, retrieve related objects of
the given type."""
attr_id = attr + "_id"
pks = {log[attr_id] for log in logs if attr_id in log}
return {rel.pk: rel for rel in model.objects.filter(pk__in=pks)}

View File

@ -0,0 +1,117 @@
import csv
import logging
import os
from aircox.conf import settings
from aircox.models import Track
__all__ = ("PlaylistImport",)
logger = logging.getLogger("aircox.commands")
class PlaylistImport:
"""Import one or more playlist for the given sound. Attach it to the
provided sound.
Playlists are in CSV format, where columns are separated with a
'{settings.IMPORT_PLAYLIST_CSV_DELIMITER}'. Text quote is
{settings.IMPORT_PLAYLIST_CSV_TEXT_QUOTE}.
If 'minutes' or 'seconds' are given, position will be expressed as timed
position, instead of position in playlist.
"""
path = None
data = None
tracks = None
track_kwargs = {}
def __init__(self, path=None, **track_kwargs):
self.path = path
self.track_kwargs = track_kwargs
def reset(self):
self.data = None
self.tracks = None
def run(self):
self.read()
if self.track_kwargs.get("sound") is not None:
self.make_playlist()
def read(self):
if not os.path.exists(self.path):
return True
with open(self.path, "r") as file:
logger.info("start reading csv " + self.path)
self.data = list(
csv.DictReader(
(
row
for row in file
if not (
row.startswith("#") or row.startswith("\ufeff#")
)
and row.strip()
),
fieldnames=settings.IMPORT_PLAYLIST_CSV_COLS,
delimiter=settings.IMPORT_PLAYLIST_CSV_DELIMITER,
quotechar=settings.IMPORT_PLAYLIST_CSV_TEXT_QUOTE,
)
)
def make_playlist(self):
"""Make a playlist from the read data, and return it.
If save is true, save it into the database
"""
if self.track_kwargs.get("sound") is None:
logger.error(
"related track's sound is missing. Skip import of "
+ self.path
+ "."
)
return
maps = settings.IMPORT_PLAYLIST_CSV_COLS
tracks = []
logger.info("parse csv file " + self.path)
has_timestamp = ("minutes" or "seconds") in maps
for index, line in enumerate(self.data):
if ("title" or "artist") not in line:
return
try:
timestamp = (
int(line.get("minutes") or 0) * 60
+ int(line.get("seconds") or 0)
if has_timestamp
else None
)
track, created = Track.objects.get_or_create(
title=line.get("title"),
artist=line.get("artist"),
position=index,
**self.track_kwargs
)
track.timestamp = timestamp
track.info = line.get("info")
tags = line.get("tags")
if tags:
track.tags.add(*tags.lower().split(","))
except Exception as err:
logger.warning(
"an error occured for track {index}, it may not "
"have been saved: {err}".format(index=index, err=err)
)
continue
track.save()
tracks.append(track)
self.tracks = tracks
return tracks

View File

@ -0,0 +1,236 @@
#! /usr/bin/env python3
"""Provide SoundFile which is used to link between database and file system.
File name
=========
It tries to parse the file name to get the date of the diffusion of an
episode and associate the file with it; We use the following format:
yyyymmdd[_n][_][name]
Where:
'yyyy' the year of the episode's diffusion;
'mm' the month of the episode's diffusion;
'dd' the day of the episode's diffusion;
'n' the number of the episode (if multiple episodes);
'name' the title of the sound;
Sound Quality
=============
To check quality of files, call the command sound_quality_check using the
parameters given by the setting SOUND_QUALITY. This script requires
Sox (and soxi).
"""
import logging
import os
import re
from datetime import date
import mutagen
from django.conf import settings as conf
from django.utils import timezone as tz
from django.utils.translation import gettext as _
from aircox import utils
from aircox.models import Program, Sound, Track
from .playlist_import import PlaylistImport
logger = logging.getLogger("aircox.commands")
class SoundFile:
"""Handle synchronisation between sounds on files and database."""
path = None
info = None
path_info = None
sound = None
def __init__(self, path):
self.path = path
@property
def sound_path(self):
"""Relative path name."""
return self.path.replace(conf.MEDIA_ROOT + "/", "")
@property
def episode(self):
return self.sound and self.sound.episode
def sync(
self,
sound=None,
program=None,
deleted=False,
keep_deleted=False,
**kwargs
):
"""Update related sound model and save it."""
if deleted:
return self._on_delete(self.path, keep_deleted)
# FIXME: sound.program as not null
if not program:
program = Program.get_from_path(self.path)
logger.debug('program from path "%s" -> %s', self.path, program)
kwargs["program_id"] = program.pk
if sound:
created = False
else:
sound, created = Sound.objects.get_or_create(
file=self.sound_path, defaults=kwargs
)
self.sound = sound
self.path_info = self.read_path(self.path)
sound.program = program
if created or sound.check_on_file():
sound.name = self.path_info.get("name")
self.info = self.read_file_info()
if self.info is not None:
sound.duration = utils.seconds_to_time(self.info.info.length)
# check for episode
if sound.episode is None and "year" in self.path_info:
sound.episode = self.find_episode(sound, self.path_info)
sound.save()
# check for playlist
self.find_playlist(sound)
return sound
def _on_delete(self, path, keep_deleted):
# TODO: remove from db on delete
if keep_deleted:
sound = Sound.objects.path(self.path).first()
if sound:
if keep_deleted:
sound.type = sound.TYPE_REMOVED
sound.check_on_file()
sound.save()
return sound
else:
Sound.objects.path(self.path).delete()
def read_path(self, path):
"""Parse path name returning dictionary of extracted info. It can
contain:
- `year`, `month`, `day`: diffusion date
- `hour`, `minute`: diffusion time
- `n`: sound arbitrary number (used for sound ordering)
- `name`: cleaned name extracted or file name (without extension)
"""
basename = os.path.basename(path)
basename = os.path.splitext(basename)[0]
reg_match = self._path_re.search(basename)
if reg_match:
info = reg_match.groupdict()
for k in ("year", "month", "day", "hour", "minute", "n"):
if info.get(k) is not None:
info[k] = int(info[k])
name = info.get("name")
info["name"] = name and self._into_name(name) or basename
else:
info = {"name": basename}
return info
_path_re = re.compile(
"^(?P<year>[0-9]{4})(?P<month>[0-9]{2})(?P<day>[0-9]{2})"
"(_(?P<hour>[0-9]{2})h(?P<minute>[0-9]{2}))?"
"(_(?P<n>[0-9]+))?"
"_?[ -]*(?P<name>.*)$"
)
def _into_name(self, name):
name = name.replace("_", " ")
return " ".join(r.capitalize() for r in name.split(" "))
def read_file_info(self):
"""Read file information and metadata."""
try:
if os.path.exists(self.path):
return mutagen.File(self.path)
except Exception:
pass
return None
def find_episode(self, sound, path_info):
"""For a given program, check if there is an initial diffusion to
associate to, using the date info we have. Update self.sound and save
it consequently.
We only allow initial diffusion since there should be no rerun.
"""
program, pi = sound.program, path_info
if "year" not in pi or not sound or sound.episode:
return None
year, month, day = pi.get("year"), pi.get("month"), pi.get("day")
if pi.get("hour") is not None:
at = tz.datetime(
year, month, day, pi.get("hour", 0), pi.get("minute", 0)
)
at = tz.make_aware(at)
else:
at = date(year, month, day)
diffusion = program.diffusion_set.at(at).first()
if not diffusion:
return None
logger.debug("%s <--> %s", sound.file.name, str(diffusion.episode))
return diffusion.episode
def find_playlist(self, sound=None, use_meta=True):
"""Find a playlist file corresponding to the sound path, such as:
my_sound.ogg => my_sound.csv.
Use sound's file metadata if no corresponding playlist has been
found and `use_meta` is True.
"""
if sound is None:
sound = self.sound
if sound.track_set.count() > 1:
return
# import playlist
path_noext, ext = os.path.splitext(self.sound.file.path)
path = path_noext + ".csv"
if os.path.exists(path):
PlaylistImport(path, sound=sound).run()
# use metadata
elif use_meta:
if self.info is None:
self.read_file_info()
if self.info and self.info.tags:
tags = self.info.tags
title, artist, album, year = tuple(
t and ", ".join(t)
for t in (
tags.get(k)
for k in ("title", "artist", "album", "year")
)
)
title = (
title
or (self.path_info and self.path_info.get("name"))
or os.path.basename(path_noext)
)
info = (
"{} ({})".format(album, year)
if album and year
else album or year or ""
)
track = Track(
sound=sound,
position=int(tags.get("tracknumber", 0)),
title=title,
artist=artist or _("unknown"),
info=info,
)
track.save()

View File

@ -0,0 +1,321 @@
#! /usr/bin/env python3
"""Monitor sound files; For each program, check for:
- new files;
- deleted files;
- differences between files and sound;
- quality of the files;
It tries to parse the file name to get the date of the diffusion of an
episode and associate the file with it; WNotifye the following format:
yyyymmdd[_n][_][name]
Where:
'yyyy' the year Notifyhe episode's diffusion;
'mm' the month of the episode's difNotifyon;
'dd' the day of the episode's diffusion;
'n' the number of the episode (if multiple episodes);
'name' the title of the sNotify;
To check quality of files, call the command sound_quality_check using the
parameters given by the setting SOUND_QUALITY. This script requires
Sox (and soxi).
"""
import atexit
from concurrent import futures
import logging
import time
import os
# from datetime import datetime, timedelta
from django.utils.timezone import datetime, timedelta
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
from aircox.conf import settings
from aircox.models import Sound, Program
from .sound_file import SoundFile
# FIXME: logger should be different in used classes (e.g. "aircox.commands")
# defaulting to logging.
logger = logging.getLogger("aircox.commands")
__all__ = (
"Task",
"CreateTask",
"DeleteTask",
"MoveTask",
"ModifiedTask",
"MonitorHandler",
)
class Task:
"""Base class used to execute a specific task on file change event.
Handlers are sent to a multithread pool.
"""
future = None
"""Future that promised the handler's call."""
log_msg = None
"""Log message to display on event happens."""
timestamp = None
"""Last ping timestamp (the event happened)."""
def __init__(self, logger=logging):
self.ping()
def ping(self):
""""""
self.timestamp = datetime.now()
def __call__(self, event, path=None, logger=logging, **kw):
sound_file = SoundFile(path or event.src_path)
if self.log_msg:
msg = self.log_msg.format(event=event, sound_file=sound_file)
logger.info(msg)
sound_file.sync(**kw)
return sound_file
class CreateTask(Task):
log_msg = "Sound file created: {sound_file.path}"
class DeleteTask(Task):
log_msg = "Sound file deleted: {sound_file.path}"
def __call__(self, *args, **kwargs):
kwargs["deleted"] = True
return super().__call__(*args, **kwargs)
class MoveTask(Task):
log_msg = "Sound file moved: {event.src_path} -> {event.dest_path}"
def __call__(self, event, **kw):
sound = Sound.objects.filter(file=event.src_path).first()
if sound:
kw["sound"] = sound
kw["path"] = event.src_path
else:
kw["path"] = event.dest_path
return super().__call__(event, **kw)
class ModifiedTask(Task):
timeout_delta = timedelta(seconds=30)
log_msg = "Sound file updated: {sound_file.path}"
def wait(self):
# multiple call of this handler can be done consecutively, we block
# its thread using timeout
# Note: this method may be subject to some race conflicts, but this
# should not be big a real issue.
timeout = self.timestamp + self.timeout_delta
while datetime.now() < timeout:
time.sleep(self.timeout_delta.total_seconds())
timeout = self.timestamp + self.timeout_delta
def __call__(self, event, **kw):
self.wait()
return super().__call__(event, **kw)
class MonitorHandler(PatternMatchingEventHandler):
"""MonitorHandler is used as a Watchdog event handler.
It uses a multithread pool in order to execute tasks on events. If a
job already exists for this file and event, it pings existing job
without creating a new one.
"""
pool = None
jobs = None
def __init__(self, subdir, pool, jobs=None, **sync_kw):
"""
:param str subdir: sub-directory in program dirs to monitor \
(SOUND_ARCHIVES_SUBDIR or SOUND_EXCERPTS_SUBDIR);
:param concurrent.futures.Executor pool: pool executing jobs on file
change;
:param **sync_kw: kwargs passed to `SoundFile.sync`;
"""
self.subdir = subdir
self.pool = pool
self.jobs = jobs or {}
self.sync_kw = sync_kw
patterns = [
"*/{}/*{}".format(self.subdir, ext)
for ext in settings.SOUND_FILE_EXT
]
super().__init__(patterns=patterns, ignore_directories=True)
def on_created(self, event):
self._submit(CreateTask(), event, "new", **self.sync_kw)
def on_deleted(self, event):
self._submit(DeleteTask(), event, "del")
def on_moved(self, event):
self._submit(MoveTask(), event, "mv", **self.sync_kw)
def on_modified(self, event):
self._submit(ModifiedTask(), event, "up", **self.sync_kw)
def _submit(self, handler, event, job_key_prefix, **kwargs):
"""Send handler job to pool if not already running.
Return tuple with running job and boolean indicating if its a
new one.
"""
key = job_key_prefix + ":" + event.src_path
job = self.jobs.get(key)
if job and not job.future.done():
job.ping()
return job, False
handler.future = self.pool.submit(handler, event, **kwargs)
self.jobs[key] = handler
def done(r):
if self.jobs.get(key) is handler:
del self.jobs[key]
handler.future.add_done_callback(done)
return handler, True
class SoundMonitor:
"""Monitor for filesystem changes in order to synchronise database and
analyse files of a provided program."""
def report(self, program=None, component=None, *content, logger=logging):
content = " ".join([str(c) for c in content])
logger.info(
f"{program}: {content}"
if not component
else f"{program}, {component}: {content}"
)
def scan(self, logger=logging):
"""For all programs, scan dirs.
Return scanned directories.
"""
logger.info("scan all programs...")
programs = Program.objects.filter()
dirs = []
for program in programs:
logger.info(f"#{program.id} {program.title}")
self.scan_for_program(
program,
settings.SOUND_ARCHIVES_SUBDIR,
logger=logger,
type=Sound.TYPE_ARCHIVE,
)
self.scan_for_program(
program,
settings.SOUND_EXCERPTS_SUBDIR,
logger=logger,
type=Sound.TYPE_EXCERPT,
)
dirs.append(program.abspath)
return dirs
def scan_for_program(
self, program, subdir, logger=logging, **sound_kwargs
):
"""Scan a given directory that is associated to the given program, and
update sounds information."""
logger.info("- %s/", subdir)
if not program.ensure_dir(subdir):
return
subdir = os.path.join(program.abspath, subdir)
sounds = []
# sounds in directory
for path in os.listdir(subdir):
path = os.path.join(subdir, path)
if not path.endswith(settings.SOUND_FILE_EXT):
continue
sound_file = SoundFile(path)
sound_file.sync(program=program, **sound_kwargs)
sounds.append(sound_file.sound.pk)
# sounds in db & unchecked
sounds = Sound.objects.filter(file__startswith=subdir).exclude(
pk__in=sounds
)
self.check_sounds(sounds, program=program)
def check_sounds(self, qs, **sync_kwargs):
"""Only check for the sound existence or update."""
# check files
for sound in qs:
if sound.check_on_file():
SoundFile(sound.file.path).sync(sound=sound, **sync_kwargs)
_running = False
def monitor(self, logger=logging):
if self._running:
raise RuntimeError("already running")
"""Run in monitor mode."""
with futures.ThreadPoolExecutor() as pool:
archives_handler = MonitorHandler(
settings.SOUND_ARCHIVES_SUBDIR,
pool,
type=Sound.TYPE_ARCHIVE,
logger=logger,
)
excerpts_handler = MonitorHandler(
settings.SOUND_EXCERPTS_SUBDIR,
pool,
type=Sound.TYPE_EXCERPT,
logger=logger,
)
observer = Observer()
observer.schedule(
archives_handler,
settings.PROGRAMS_DIR_ABS,
recursive=True,
)
observer.schedule(
excerpts_handler,
settings.PROGRAMS_DIR_ABS,
recursive=True,
)
observer.start()
def leave():
observer.stop()
observer.join()
atexit.register(leave)
self._running = True
while self._running:
time.sleep(1)
leave()
atexit.unregister(leave)
def stop(self):
"""Stop monitor() loop."""
self._running = False

View File

@ -0,0 +1,130 @@
"""Provide sound analysis class using Sox."""
import logging
import re
import subprocess
logger = logging.getLogger("aircox.commands")
__all__ = ("SoxStats", "SoundStats")
class SoxStats:
"""Run Sox process and parse output."""
attributes = [
"DC offset",
"Min level",
"Max level",
"Pk lev dB",
"RMS lev dB",
"RMS Pk dB",
"RMS Tr dB",
"Flat factor",
"Length s",
]
values = None
def __init__(self, path=None, **kwargs):
"""If path is given, call analyse with path and kwargs."""
if path:
self.analyse(path, **kwargs)
def analyse(self, path, at=None, length=None):
"""If at and length are given use them as excerpt to analyse."""
args = ["sox", path, "-n"]
if at is not None and length is not None:
args += ["trim", str(at), str(length)]
args.append("stats")
p = subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
# sox outputs to stderr (my god WHYYYY)
out_, out = p.communicate()
self.values = self.parse(str(out, encoding="utf-8"))
def parse(self, output):
"""Parse sox output, settubg values from it."""
values = {}
for attr in self.attributes:
value = re.search(attr + r"\s+(?P<value>\S+)", output)
value = value and value.groupdict()
if value:
try:
value = float(value.get("value"))
except ValueError:
value = None
values[attr] = value
values["length"] = values.pop("Length s", None)
return values
def get(self, attr):
return self.values.get(attr)
class SoundStats:
path = None # file path
sample_length = 120 # default sample length in seconds
stats = None # list of samples statistics
bad = None # list of bad samples
good = None # list of good samples
def __init__(self, path, sample_length=None):
self.path = path
if sample_length is not None:
self.sample_length = sample_length
def get_file_stats(self):
return self.stats and self.stats[0] or None
def analyse(self):
logger.debug("complete file analysis")
self.stats = [SoxStats(self.path)]
position = 0
length = self.stats[0].get("length")
print(self.stats, "-----")
if not self.sample_length:
return
logger.debug("start samples analysis...")
while position < length:
stats = SoxStats(self.path, at=position, length=self.sample_length)
self.stats.append(stats)
position += self.sample_length
def check(self, name, min_val, max_val):
self.good = [
index
for index, stats in enumerate(self.stats)
if min_val <= stats.get(name) <= max_val
]
self.bad = [
index
for index, stats in enumerate(self.stats)
if index not in self.good
]
self.resume()
def resume(self):
if self.good:
logger.debug(
self.path + " -> good: \033[92m%s\033[0m",
", ".join(self._view(self.good)),
)
if self.bad:
logger.debug(
self.path + " -> bad: \033[91m%s\033[0m",
", ".join(self._view(self.bad)),
)
def _view(self, array):
return [
"file"
if index == 0
else "sample {} (at {} seconds)".format(
index, (index - 1) * self.sample_length
)
for index in array
]