!111: tests: aircox.management (#114)

!111 Co-authored-by: bkfox <thomas bkfox net> Reviewed-on: rc/aircox#114
2023-06-30 16:39:55 +02:00
parent faecdf5495
commit f9ad81ddac
27 changed files with 1534 additions and 625 deletions
--- a/aircox/controllers/README.md
+++ b/aircox/controllers/README.md
@@ -0,0 +1,8 @@
+# aircox.controllers
+This module provides the following controllers classes:
+- `log_archiver.LogArchiver`: dumps and load gzip archives from Log models.
+- `sound_file.SoundFile`: handle synchronisation between filesystem and database for a sound file.
+- `sound_monitor.SoundMonitor`: monitor filesystem for changes on audio files and synchronise database.
+- `sound_stats.SoundStats` (+ `SoxStats`): get audio statistics of an audio file using Sox.
+- `diffuions.Diffusions`: generate, update and clean diffusions.
+- `playlist_import.PlaylistImport`: import playlists from CSV.
--- a/aircox/controllers/init.py
+++ b/aircox/controllers/init.py
--- a/aircox/controllers/log_archiver.py
+++ b/aircox/controllers/log_archiver.py
@@ -0,0 +1,112 @@
+import gzip
+import os
+
+import yaml
+from django.utils.functional import cached_property
+
+from aircox.conf import settings
+from aircox.models import Diffusion, Sound, Track, Log
+
+
+__all__ = ("LogArchiver",)
+
+
+class LogArchiver:
+    """Commodity class used to manage archives of logs."""
+
+    @cached_property
+    def fields(self):
+        return Log._meta.get_fields()
+
+    @staticmethod
+    def get_path(station, date):
+        return os.path.join(
+            settings.LOGS_ARCHIVES_DIR_ABS,
+            "{}_{}.log.gz".format(date.strftime("%Y%m%d"), station.pk),
+        )
+
+    def archive(self, qs, keep=False):
+        """Archive logs of the given queryset.
+
+        Delete archived logs if not `keep`. Return the count of archived
+        logs
+        """
+        if not qs.exists():
+            return 0
+
+        os.makedirs(settings.LOGS_ARCHIVES_DIR_ABS, exist_ok=True)
+        count = qs.count()
+        logs = self.sort_logs(qs)
+
+        # Note: since we use Yaml, we can just append new logs when file
+        # exists yet <3
+        for (station, date), logs in logs.items():
+            path = self.get_path(station, date)
+            # FIXME: remove binary mode
+            with gzip.open(path, "ab") as archive:
+                data = yaml.dump(
+                    [self.serialize(line) for line in logs]
+                ).encode("utf8")
+                archive.write(data)
+
+        if not keep:
+            qs.delete()
+
+        return count
+
+    @staticmethod
+    def sort_logs(qs):
+        """Sort logs by station and date and return a dict of `{
+        (station,date): [logs] }`."""
+        qs = qs.order_by("date")
+        logs = {}
+        for log in qs:
+            key = (log.station, log.date.date())
+            logs.setdefault(key, []).append(log)
+        return logs
+
+    def serialize(self, log):
+        """Serialize log."""
+        return {i.attname: getattr(log, i.attname) for i in self.fields}
+
+    def load(self, station, date):
+        """Load an archive returning logs in a list."""
+        path = self.get_path(station, date)
+
+        if not os.path.exists(path):
+            return []
+        return self.load_file(path)
+
+    def load_file(self, path):
+        with gzip.open(path, "rb") as archive:
+            data = archive.read()
+            logs = yaml.load(data)
+
+            # we need to preload diffusions, sounds and tracks
+            rels = {
+                "diffusion": self.get_relations(logs, Diffusion, "diffusion"),
+                "sound": self.get_relations(logs, Sound, "sound"),
+                "track": self.get_relations(logs, Track, "track"),
+            }
+
+            def rel_obj(log, attr):
+                rel_id = log.get(attr + "_id")
+                return rels[attr][rel_id] if rel_id else None
+
+            return [
+                Log(
+                    diffusion=rel_obj(log, "diffusion"),
+                    sound=rel_obj(log, "sound"),
+                    track=rel_obj(log, "track"),
+                    **log
+                )
+                for log in logs
+            ]
+
+    @staticmethod
+    def get_relations(logs, model, attr):
+        """From a list of dict representing logs, retrieve related objects of
+        the given type."""
+        attr_id = attr + "_id"
+        pks = {log[attr_id] for log in logs if attr_id in log}
+        return {rel.pk: rel for rel in model.objects.filter(pk__in=pks)}
--- a/aircox/controllers/playlist_import.py
+++ b/aircox/controllers/playlist_import.py
@@ -0,0 +1,117 @@
+import csv
+import logging
+import os
+
+
+from aircox.conf import settings
+from aircox.models import Track
+
+
+__all__ = ("PlaylistImport",)
+
+
+logger = logging.getLogger("aircox.commands")
+
+
+class PlaylistImport:
+    """Import one or more playlist for the given sound. Attach it to the
+    provided sound.
+
+    Playlists are in CSV format, where columns are separated with a
+    '{settings.IMPORT_PLAYLIST_CSV_DELIMITER}'. Text quote is
+    {settings.IMPORT_PLAYLIST_CSV_TEXT_QUOTE}.
+
+    If 'minutes' or 'seconds' are given, position will be expressed as timed
+    position, instead of position in playlist.
+    """
+
+    path = None
+    data = None
+    tracks = None
+    track_kwargs = {}
+
+    def __init__(self, path=None, **track_kwargs):
+        self.path = path
+        self.track_kwargs = track_kwargs
+
+    def reset(self):
+        self.data = None
+        self.tracks = None
+
+    def run(self):
+        self.read()
+        if self.track_kwargs.get("sound") is not None:
+            self.make_playlist()
+
+    def read(self):
+        if not os.path.exists(self.path):
+            return True
+        with open(self.path, "r") as file:
+            logger.info("start reading csv " + self.path)
+            self.data = list(
+                csv.DictReader(
+                    (
+                        row
+                        for row in file
+                        if not (
+                            row.startswith("#") or row.startswith("\ufeff#")
+                        )
+                        and row.strip()
+                    ),
+                    fieldnames=settings.IMPORT_PLAYLIST_CSV_COLS,
+                    delimiter=settings.IMPORT_PLAYLIST_CSV_DELIMITER,
+                    quotechar=settings.IMPORT_PLAYLIST_CSV_TEXT_QUOTE,
+                )
+            )
+
+    def make_playlist(self):
+        """Make a playlist from the read data, and return it.
+
+        If save is true, save it into the database
+        """
+        if self.track_kwargs.get("sound") is None:
+            logger.error(
+                "related track's sound is missing. Skip import of "
+                + self.path
+                + "."
+            )
+            return
+
+        maps = settings.IMPORT_PLAYLIST_CSV_COLS
+        tracks = []
+
+        logger.info("parse csv file " + self.path)
+        has_timestamp = ("minutes" or "seconds") in maps
+        for index, line in enumerate(self.data):
+            if ("title" or "artist") not in line:
+                return
+            try:
+                timestamp = (
+                    int(line.get("minutes") or 0) * 60
+                    + int(line.get("seconds") or 0)
+                    if has_timestamp
+                    else None
+                )
+
+                track, created = Track.objects.get_or_create(
+                    title=line.get("title"),
+                    artist=line.get("artist"),
+                    position=index,
+                    **self.track_kwargs
+                )
+                track.timestamp = timestamp
+                track.info = line.get("info")
+                tags = line.get("tags")
+                if tags:
+                    track.tags.add(*tags.lower().split(","))
+            except Exception as err:
+                logger.warning(
+                    "an error occured for track {index}, it may not "
+                    "have been saved: {err}".format(index=index, err=err)
+                )
+                continue
+
+            track.save()
+            tracks.append(track)
+        self.tracks = tracks
+        return tracks
--- a/aircox/controllers/sound_file.py
+++ b/aircox/controllers/sound_file.py
@@ -0,0 +1,236 @@
+#! /usr/bin/env python3
+"""Provide SoundFile which is used to link between database and file system.
+
+File name
+=========
+It tries to parse the file name to get the date of the diffusion of an
+episode and associate the file with it; We use the following format:
+    yyyymmdd[_n][_][name]
+
+Where:
+    'yyyy' the year of the episode's diffusion;
+    'mm' the month of the episode's diffusion;
+    'dd' the day of the episode's diffusion;
+    'n' the number of the episode (if multiple episodes);
+    'name' the title of the sound;
+
+Sound Quality
+=============
+To check quality of files, call the command sound_quality_check using the
+parameters given by the setting SOUND_QUALITY. This script requires
+Sox (and soxi).
+"""
+import logging
+import os
+import re
+from datetime import date
+
+import mutagen
+from django.conf import settings as conf
+from django.utils import timezone as tz
+from django.utils.translation import gettext as _
+
+from aircox import utils
+from aircox.models import Program, Sound, Track
+
+from .playlist_import import PlaylistImport
+
+logger = logging.getLogger("aircox.commands")
+
+
+class SoundFile:
+    """Handle synchronisation between sounds on files and database."""
+
+    path = None
+    info = None
+    path_info = None
+    sound = None
+
+    def __init__(self, path):
+        self.path = path
+
+    @property
+    def sound_path(self):
+        """Relative path name."""
+        return self.path.replace(conf.MEDIA_ROOT + "/", "")
+
+    @property
+    def episode(self):
+        return self.sound and self.sound.episode
+
+    def sync(
+        self,
+        sound=None,
+        program=None,
+        deleted=False,
+        keep_deleted=False,
+        **kwargs
+    ):
+        """Update related sound model and save it."""
+        if deleted:
+            return self._on_delete(self.path, keep_deleted)
+
+        # FIXME: sound.program as not null
+        if not program:
+            program = Program.get_from_path(self.path)
+            logger.debug('program from path "%s" -> %s', self.path, program)
+        kwargs["program_id"] = program.pk
+
+        if sound:
+            created = False
+        else:
+            sound, created = Sound.objects.get_or_create(
+                file=self.sound_path, defaults=kwargs
+            )
+
+        self.sound = sound
+        self.path_info = self.read_path(self.path)
+
+        sound.program = program
+        if created or sound.check_on_file():
+            sound.name = self.path_info.get("name")
+            self.info = self.read_file_info()
+            if self.info is not None:
+                sound.duration = utils.seconds_to_time(self.info.info.length)
+
+        # check for episode
+        if sound.episode is None and "year" in self.path_info:
+            sound.episode = self.find_episode(sound, self.path_info)
+        sound.save()
+
+        # check for playlist
+        self.find_playlist(sound)
+        return sound
+
+    def _on_delete(self, path, keep_deleted):
+        # TODO: remove from db on delete
+        if keep_deleted:
+            sound = Sound.objects.path(self.path).first()
+            if sound:
+                if keep_deleted:
+                    sound.type = sound.TYPE_REMOVED
+                    sound.check_on_file()
+                    sound.save()
+            return sound
+        else:
+            Sound.objects.path(self.path).delete()
+
+    def read_path(self, path):
+        """Parse path name returning dictionary of extracted info. It can
+        contain:
+
+        - `year`, `month`, `day`: diffusion date
+        - `hour`, `minute`: diffusion time
+        - `n`: sound arbitrary number (used for sound ordering)
+        - `name`: cleaned name extracted or file name (without extension)
+        """
+        basename = os.path.basename(path)
+        basename = os.path.splitext(basename)[0]
+        reg_match = self._path_re.search(basename)
+        if reg_match:
+            info = reg_match.groupdict()
+            for k in ("year", "month", "day", "hour", "minute", "n"):
+                if info.get(k) is not None:
+                    info[k] = int(info[k])
+
+            name = info.get("name")
+            info["name"] = name and self._into_name(name) or basename
+        else:
+            info = {"name": basename}
+        return info
+
+    _path_re = re.compile(
+        "^(?P<year>[0-9]{4})(?P<month>[0-9]{2})(?P<day>[0-9]{2})"
+        "(_(?P<hour>[0-9]{2})h(?P<minute>[0-9]{2}))?"
+        "(_(?P<n>[0-9]+))?"
+        "_?[ -]*(?P<name>.*)$"
+    )
+
+    def _into_name(self, name):
+        name = name.replace("_", " ")
+        return " ".join(r.capitalize() for r in name.split(" "))
+
+    def read_file_info(self):
+        """Read file information and metadata."""
+        try:
+            if os.path.exists(self.path):
+                return mutagen.File(self.path)
+        except Exception:
+            pass
+        return None
+
+    def find_episode(self, sound, path_info):
+        """For a given program, check if there is an initial diffusion to
+        associate to, using the date info we have. Update self.sound and save
+        it consequently.
+
+        We only allow initial diffusion since there should be no rerun.
+        """
+        program, pi = sound.program, path_info
+        if "year" not in pi or not sound or sound.episode:
+            return None
+
+        year, month, day = pi.get("year"), pi.get("month"), pi.get("day")
+        if pi.get("hour") is not None:
+            at = tz.datetime(
+                year, month, day, pi.get("hour", 0), pi.get("minute", 0)
+            )
+            at = tz.make_aware(at)
+        else:
+            at = date(year, month, day)
+
+        diffusion = program.diffusion_set.at(at).first()
+        if not diffusion:
+            return None
+
+        logger.debug("%s <--> %s", sound.file.name, str(diffusion.episode))
+        return diffusion.episode
+
+    def find_playlist(self, sound=None, use_meta=True):
+        """Find a playlist file corresponding to the sound path, such as:
+        my_sound.ogg => my_sound.csv.
+
+        Use sound's file metadata if no corresponding playlist has been
+        found and `use_meta` is True.
+        """
+        if sound is None:
+            sound = self.sound
+        if sound.track_set.count() > 1:
+            return
+
+        # import playlist
+        path_noext, ext = os.path.splitext(self.sound.file.path)
+        path = path_noext + ".csv"
+        if os.path.exists(path):
+            PlaylistImport(path, sound=sound).run()
+        # use metadata
+        elif use_meta:
+            if self.info is None:
+                self.read_file_info()
+            if self.info and self.info.tags:
+                tags = self.info.tags
+                title, artist, album, year = tuple(
+                    t and ", ".join(t)
+                    for t in (
+                        tags.get(k)
+                        for k in ("title", "artist", "album", "year")
+                    )
+                )
+                title = (
+                    title
+                    or (self.path_info and self.path_info.get("name"))
+                    or os.path.basename(path_noext)
+                )
+                info = (
+                    "{} ({})".format(album, year)
+                    if album and year
+                    else album or year or ""
+                )
+                track = Track(
+                    sound=sound,
+                    position=int(tags.get("tracknumber", 0)),
+                    title=title,
+                    artist=artist or _("unknown"),
+                    info=info,
+                )
+                track.save()
--- a/aircox/controllers/sound_monitor.py
+++ b/aircox/controllers/sound_monitor.py
@@ -0,0 +1,321 @@
+#! /usr/bin/env python3
+
+"""Monitor sound files; For each program, check for:
+
+- new files;
+- deleted files;
+- differences between files and sound;
+- quality of the files;
+
+It tries to parse the file name to get the date of the diffusion of an
+episode and associate the file with it; WNotifye the following format:
+    yyyymmdd[_n][_][name]
+
+Where:
+    'yyyy' the year Notifyhe episode's diffusion;
+    'mm' the month of the episode's difNotifyon;
+    'dd' the day of the episode's diffusion;
+    'n' the number of the episode (if multiple episodes);
+    'name' the title of the sNotify;
+
+
+To check quality of files, call the command sound_quality_check using the
+parameters given by the setting SOUND_QUALITY. This script requires
+Sox (and soxi).
+"""
+import atexit
+from concurrent import futures
+import logging
+import time
+import os
+
+# from datetime import datetime, timedelta
+
+from django.utils.timezone import datetime, timedelta
+
+from watchdog.observers import Observer
+from watchdog.events import PatternMatchingEventHandler
+
+from aircox.conf import settings
+from aircox.models import Sound, Program
+
+from .sound_file import SoundFile
+
+
+# FIXME: logger should be different in used classes (e.g. "aircox.commands")
+#        defaulting to logging.
+logger = logging.getLogger("aircox.commands")
+
+
+__all__ = (
+    "Task",
+    "CreateTask",
+    "DeleteTask",
+    "MoveTask",
+    "ModifiedTask",
+    "MonitorHandler",
+)
+
+
+class Task:
+    """Base class used to execute a specific task on file change event.
+
+    Handlers are sent to a multithread pool.
+    """
+
+    future = None
+    """Future that promised the handler's call."""
+    log_msg = None
+    """Log message to display on event happens."""
+    timestamp = None
+    """Last ping timestamp (the event happened)."""
+
+    def __init__(self, logger=logging):
+        self.ping()
+
+    def ping(self):
+        """"""
+        self.timestamp = datetime.now()
+
+    def __call__(self, event, path=None, logger=logging, **kw):
+        sound_file = SoundFile(path or event.src_path)
+        if self.log_msg:
+            msg = self.log_msg.format(event=event, sound_file=sound_file)
+            logger.info(msg)
+
+        sound_file.sync(**kw)
+        return sound_file
+
+
+class CreateTask(Task):
+    log_msg = "Sound file created: {sound_file.path}"
+
+
+class DeleteTask(Task):
+    log_msg = "Sound file deleted: {sound_file.path}"
+
+    def __call__(self, *args, **kwargs):
+        kwargs["deleted"] = True
+        return super().__call__(*args, **kwargs)
+
+
+class MoveTask(Task):
+    log_msg = "Sound file moved: {event.src_path} -> {event.dest_path}"
+
+    def __call__(self, event, **kw):
+        sound = Sound.objects.filter(file=event.src_path).first()
+        if sound:
+            kw["sound"] = sound
+            kw["path"] = event.src_path
+        else:
+            kw["path"] = event.dest_path
+        return super().__call__(event, **kw)
+
+
+class ModifiedTask(Task):
+    timeout_delta = timedelta(seconds=30)
+    log_msg = "Sound file updated: {sound_file.path}"
+
+    def wait(self):
+        # multiple call of this handler can be done consecutively, we block
+        # its thread using timeout
+        # Note: this method may be subject to some race conflicts, but this
+        #       should not be big a real issue.
+        timeout = self.timestamp + self.timeout_delta
+        while datetime.now() < timeout:
+            time.sleep(self.timeout_delta.total_seconds())
+            timeout = self.timestamp + self.timeout_delta
+
+    def __call__(self, event, **kw):
+        self.wait()
+        return super().__call__(event, **kw)
+
+
+class MonitorHandler(PatternMatchingEventHandler):
+    """MonitorHandler is used as a Watchdog event handler.
+
+    It uses a multithread pool in order to execute tasks on events. If a
+    job already exists for this file and event, it pings existing job
+    without creating a new one.
+    """
+
+    pool = None
+    jobs = None
+
+    def __init__(self, subdir, pool, jobs=None, **sync_kw):
+        """
+        :param str subdir: sub-directory in program dirs to monitor \
+            (SOUND_ARCHIVES_SUBDIR or SOUND_EXCERPTS_SUBDIR);
+        :param concurrent.futures.Executor pool: pool executing jobs on file
+        change;
+        :param **sync_kw: kwargs passed to `SoundFile.sync`;
+        """
+        self.subdir = subdir
+        self.pool = pool
+        self.jobs = jobs or {}
+        self.sync_kw = sync_kw
+
+        patterns = [
+            "*/{}/*{}".format(self.subdir, ext)
+            for ext in settings.SOUND_FILE_EXT
+        ]
+        super().__init__(patterns=patterns, ignore_directories=True)
+
+    def on_created(self, event):
+        self._submit(CreateTask(), event, "new", **self.sync_kw)
+
+    def on_deleted(self, event):
+        self._submit(DeleteTask(), event, "del")
+
+    def on_moved(self, event):
+        self._submit(MoveTask(), event, "mv", **self.sync_kw)
+
+    def on_modified(self, event):
+        self._submit(ModifiedTask(), event, "up", **self.sync_kw)
+
+    def _submit(self, handler, event, job_key_prefix, **kwargs):
+        """Send handler job to pool if not already running.
+
+        Return tuple with running job and boolean indicating if its a
+        new one.
+        """
+        key = job_key_prefix + ":" + event.src_path
+        job = self.jobs.get(key)
+        if job and not job.future.done():
+            job.ping()
+            return job, False
+
+        handler.future = self.pool.submit(handler, event, **kwargs)
+        self.jobs[key] = handler
+
+        def done(r):
+            if self.jobs.get(key) is handler:
+                del self.jobs[key]
+
+        handler.future.add_done_callback(done)
+        return handler, True
+
+
+class SoundMonitor:
+    """Monitor for filesystem changes in order to synchronise database and
+    analyse files of a provided program."""
+
+    def report(self, program=None, component=None, *content, logger=logging):
+        content = " ".join([str(c) for c in content])
+        logger.info(
+            f"{program}: {content}"
+            if not component
+            else f"{program}, {component}: {content}"
+        )
+
+    def scan(self, logger=logging):
+        """For all programs, scan dirs.
+
+        Return scanned directories.
+        """
+        logger.info("scan all programs...")
+        programs = Program.objects.filter()
+
+        dirs = []
+        for program in programs:
+            logger.info(f"#{program.id} {program.title}")
+            self.scan_for_program(
+                program,
+                settings.SOUND_ARCHIVES_SUBDIR,
+                logger=logger,
+                type=Sound.TYPE_ARCHIVE,
+            )
+            self.scan_for_program(
+                program,
+                settings.SOUND_EXCERPTS_SUBDIR,
+                logger=logger,
+                type=Sound.TYPE_EXCERPT,
+            )
+            dirs.append(program.abspath)
+        return dirs
+
+    def scan_for_program(
+        self, program, subdir, logger=logging, **sound_kwargs
+    ):
+        """Scan a given directory that is associated to the given program, and
+        update sounds information."""
+        logger.info("- %s/", subdir)
+        if not program.ensure_dir(subdir):
+            return
+
+        subdir = os.path.join(program.abspath, subdir)
+        sounds = []
+
+        # sounds in directory
+        for path in os.listdir(subdir):
+            path = os.path.join(subdir, path)
+            if not path.endswith(settings.SOUND_FILE_EXT):
+                continue
+
+            sound_file = SoundFile(path)
+            sound_file.sync(program=program, **sound_kwargs)
+            sounds.append(sound_file.sound.pk)
+
+        # sounds in db & unchecked
+        sounds = Sound.objects.filter(file__startswith=subdir).exclude(
+            pk__in=sounds
+        )
+        self.check_sounds(sounds, program=program)
+
+    def check_sounds(self, qs, **sync_kwargs):
+        """Only check for the sound existence or update."""
+        # check files
+        for sound in qs:
+            if sound.check_on_file():
+                SoundFile(sound.file.path).sync(sound=sound, **sync_kwargs)
+
+    _running = False
+
+    def monitor(self, logger=logging):
+        if self._running:
+            raise RuntimeError("already running")
+
+        """Run in monitor mode."""
+        with futures.ThreadPoolExecutor() as pool:
+            archives_handler = MonitorHandler(
+                settings.SOUND_ARCHIVES_SUBDIR,
+                pool,
+                type=Sound.TYPE_ARCHIVE,
+                logger=logger,
+            )
+            excerpts_handler = MonitorHandler(
+                settings.SOUND_EXCERPTS_SUBDIR,
+                pool,
+                type=Sound.TYPE_EXCERPT,
+                logger=logger,
+            )
+
+            observer = Observer()
+            observer.schedule(
+                archives_handler,
+                settings.PROGRAMS_DIR_ABS,
+                recursive=True,
+            )
+            observer.schedule(
+                excerpts_handler,
+                settings.PROGRAMS_DIR_ABS,
+                recursive=True,
+            )
+            observer.start()
+
+            def leave():
+                observer.stop()
+                observer.join()
+
+            atexit.register(leave)
+
+            self._running = True
+            while self._running:
+                time.sleep(1)
+
+            leave()
+            atexit.unregister(leave)
+
+    def stop(self):
+        """Stop monitor() loop."""
+        self._running = False
--- a/aircox/controllers/sound_stats.py
+++ b/aircox/controllers/sound_stats.py
@@ -0,0 +1,130 @@
+"""Provide sound analysis class using Sox."""
+import logging
+import re
+import subprocess
+
+logger = logging.getLogger("aircox.commands")
+
+
+__all__ = ("SoxStats", "SoundStats")
+
+
+class SoxStats:
+    """Run Sox process and parse output."""
+
+    attributes = [
+        "DC offset",
+        "Min level",
+        "Max level",
+        "Pk lev dB",
+        "RMS lev dB",
+        "RMS Pk dB",
+        "RMS Tr dB",
+        "Flat factor",
+        "Length s",
+    ]
+
+    values = None
+
+    def __init__(self, path=None, **kwargs):
+        """If path is given, call analyse with path and kwargs."""
+        if path:
+            self.analyse(path, **kwargs)
+
+    def analyse(self, path, at=None, length=None):
+        """If at and length are given use them as excerpt to analyse."""
+        args = ["sox", path, "-n"]
+        if at is not None and length is not None:
+            args += ["trim", str(at), str(length)]
+        args.append("stats")
+
+        p = subprocess.Popen(
+            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        # sox outputs to stderr (my god WHYYYY)
+        out_, out = p.communicate()
+        self.values = self.parse(str(out, encoding="utf-8"))
+
+    def parse(self, output):
+        """Parse sox output, settubg values from it."""
+        values = {}
+        for attr in self.attributes:
+            value = re.search(attr + r"\s+(?P<value>\S+)", output)
+            value = value and value.groupdict()
+            if value:
+                try:
+                    value = float(value.get("value"))
+                except ValueError:
+                    value = None
+                values[attr] = value
+        values["length"] = values.pop("Length s", None)
+        return values
+
+    def get(self, attr):
+        return self.values.get(attr)
+
+
+class SoundStats:
+    path = None  # file path
+    sample_length = 120  # default sample length in seconds
+    stats = None  # list of samples statistics
+    bad = None  # list of bad samples
+    good = None  # list of good samples
+
+    def __init__(self, path, sample_length=None):
+        self.path = path
+        if sample_length is not None:
+            self.sample_length = sample_length
+
+    def get_file_stats(self):
+        return self.stats and self.stats[0] or None
+
+    def analyse(self):
+        logger.debug("complete file analysis")
+        self.stats = [SoxStats(self.path)]
+        position = 0
+        length = self.stats[0].get("length")
+        print(self.stats, "-----")
+        if not self.sample_length:
+            return
+
+        logger.debug("start samples analysis...")
+        while position < length:
+            stats = SoxStats(self.path, at=position, length=self.sample_length)
+            self.stats.append(stats)
+            position += self.sample_length
+
+    def check(self, name, min_val, max_val):
+        self.good = [
+            index
+            for index, stats in enumerate(self.stats)
+            if min_val <= stats.get(name) <= max_val
+        ]
+        self.bad = [
+            index
+            for index, stats in enumerate(self.stats)
+            if index not in self.good
+        ]
+        self.resume()
+
+    def resume(self):
+        if self.good:
+            logger.debug(
+                self.path + " -> good: \033[92m%s\033[0m",
+                ", ".join(self._view(self.good)),
+            )
+        if self.bad:
+            logger.debug(
+                self.path + " -> bad: \033[91m%s\033[0m",
+                ", ".join(self._view(self.bad)),
+            )
+
+    def _view(self, array):
+        return [
+            "file"
+            if index == 0
+            else "sample {} (at {} seconds)".format(
+                index, (index - 1) * self.sample_length
+            )
+            for index in array
+        ]