From 6773481fe6fe6fa8982f421c27a98a298ccc1bc4 Mon Sep 17 00:00:00 2001
From: bkfox <thomas bkfox net>
Date: Mon, 21 Sep 2020 15:19:06 +0200
Subject: [PATCH] async sound monitor & clean-up

---
 aircox/management/commands/sounds_monitor.py  | 332 ++++++++----------
 .../commands/sounds_quality_check.py          |   6 -
 aircox/models/program.py                      |  13 +-
 aircox/models/sound.py                        |  35 --
 aircox/utils.py                               |   6 +-
 5 files changed, 145 insertions(+), 247 deletions(-)
diff --git a/aircox/management/commands/sounds_monitor.py b/aircox/management/commands/sounds_monitor.py
index afcaba4..e48be3b 100755
--- a/aircox/management/commands/sounds_monitor.py
+++ b/aircox/management/commands/sounds_monitor.py
@@ -1,4 +1,9 @@
 #! /usr/bin/env python3
+
+# TODO:
+# - quality check
+# - Sound model => program field as not null
+
 """
 Monitor sound files; For each program, check for:
 - new files;
@@ -23,23 +28,24 @@ parameters given by the setting AIRCOX_SOUND_QUALITY. This script requires
 Sox (and soxi).
 """
 from argparse import RawTextHelpFormatter
+import concurrent.futures as futures
 import datetime
 import atexit
 import logging
 import os
 import re
-import subprocess
 import time
 
+import mutagen
 from watchdog.observers import Observer
 from watchdog.events import PatternMatchingEventHandler, FileModifiedEvent
 
-from django.conf import settings as main_settings
 from django.core.management.base import BaseCommand, CommandError
 from django.utils import timezone as tz
+from django.utils.translation import gettext as _
 
 from aircox import settings, utils
-from aircox.models import Diffusion, Program, Sound
+from aircox.models import Diffusion, Program, Sound, Track
 from .import_playlist import PlaylistImport
 
 logger = logging.getLogger('aircox.commands')
@@ -53,97 +59,71 @@ sound_path_re = re.compile(
 )
 
 
-class SoundInfo:
-    name = ''
+class SoundFile:
+    path = None
+    info = None
+    path_info = None
     sound = None
 
-    year = None
-    month = None
-    day = None
-    hour = None
-    minute = None
-    n = None
-    duration = None
-
-    @property
-    def path(self):
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        """
-        Parse file name to get info on the assumption it has the correct
-        format (given in Command.help)
-        """
-        name = os.path.splitext(os.path.basename(value))[0]
-        match = sound_path_re.search(name)
-        match = match.groupdict() if match and match.groupdict() else \
-                {'name': name}
-
-        self._path = value
-        self.name = match['name'].replace('_', ' ').capitalize()
-
-        for key in ('year', 'month', 'day', 'hour', 'minute'):
-            value = match.get(key)
-            setattr(self, key, int(value) if value is not None else None)
-
-        self.n = match.get('n')
-
-    def __init__(self, path='', sound=None):
+    def __init__(self, path):
         self.path = path
-        self.sound = sound
 
-    def get_duration(self):
-        p = subprocess.Popen(['soxi', '-D', self.path],
-                             stdout=subprocess.PIPE,
-                             stderr=subprocess.PIPE)
-        out, err = p.communicate()
-        if not err:
-            duration = utils.seconds_to_time(int(float(out)))
-            self.duration = duration
-            return duration
-
-    def get_sound(self, save=True, **kwargs):
+    def sync(self, sound=None, program=None, deleted=False, **kwargs):
         """
-        Get or create a sound using self info.
-
-        If the sound is created/modified, get its duration and update it
-        (if save is True, sync to DB), and check for a playlist file.
+        Update related sound model and save it.
         """
-        sound, created = Sound.objects.get_or_create(
-            path=self.path, defaults=kwargs)
+        if deleted:
+            sound = Sound.objects.filter(path=self.path).first()
+            if sound:
+                sound.type = sound.TYPE_REMOVED
+                sound.check_on_file()
+                sound.save()
+                return sound
 
+        # FIXME: sound.program as not null
+        program = kwargs['program'] = Program.get_from_path(self.path)
+        sound, created = Sound.objects.get_or_create(path=self.path, defaults=kwargs) \
+                         if not sound else (sound, False)
+
+        sound.program = program
         if created or sound.check_on_file():
             logger.info('sound is new or have been modified -> %s', self.path)
-            sound.duration = self.get_duration()
-            sound.name = self.name
-            if save:
-                sound.save()
+            self.read_path()
+            self.read_file_info()
+            sound.duration = utils.seconds_to_time(self.info.info.length)
+            sound.name = self.path_info.get('name')
+
+        # check for episode
+        if sound.episode is None and self.read_path():
+            self.find_episode(program)
+
         self.sound = sound
+        sound.save()
+        self.find_playlist(sound)
         return sound
 
-    def find_playlist(self, sound, use_default=True):
+    def read_path(self):
         """
-        Find a playlist file corresponding to the sound path, such as:
-            my_sound.ogg => my_sound.csv
-
-        If use_default is True and there is no playlist find found,
-        use sound file's metadata.
+        Parse file name to get info on the assumption it has the correct
+        format (given in Command.help). Return True if path contains informations.
         """
-        if sound.track_set.count():
-            return
+        if self.path_info:
+            return 'year' in self.path_info
 
-        # import playlist
-        path = os.path.splitext(self.sound.path)[0] + '.csv'
-        if os.path.exists(path):
-            PlaylistImport(path, sound=sound).run()
-        # try metadata
-        elif use_default:
-            track = sound.file_metadata()
-            if track:
-                track.save()
+        name = os.path.splitext(os.path.basename(self.path))[0]
+        match = sound_path_re.search(name)
+        if match:
+            self.path_info = match.groupdict()
+            return True
+        else:
+            self.path_info = {'name': name}
+            return False
 
-    def find_episode(self, program, save=True):
+    def read_file_info(self):
+        """ Read file information and metadata. """
+        self.info = mutagen.File(self.path)
+
+    def find_episode(self, program):
         """
         For a given program, check if there is an initial diffusion
         to associate to, using the date info we have. Update self.sound
@@ -152,37 +132,74 @@ class SoundInfo:
         We only allow initial diffusion since there should be no
         rerun.
         """
-        if self.year is None or not self.sound or self.sound.episode:
-            return
+        pi = self.path_info
+        if 'year' not in pi or not self.sound or self.sound.episode:
+            return None
 
-        if self.hour is None:
-            date = datetime.date(self.year, self.month, self.day)
+        if 'hour' not in pi:
+            date = datetime.date(pi.get('year'), pi.get('month'), pi.get('day'))
         else:
-            date = tz.datetime(self.year, self.month, self.day,
-                               self.hour or 0, self.minute or 0)
+            date = tz.datetime(pi.get('year'), pi.get('month'), pi.get('day'),
+                               pi.get('hour') or 0, pi.get('minute') or 0)
             date = tz.get_current_timezone().localize(date)
 
         diffusion = program.diffusion_set.initial().at(date).first()
         if not diffusion:
-            return
+            return None
 
         logger.info('%s <--> %s', self.sound.path, str(diffusion.episode))
         self.sound.episode = diffusion.episode
-        if save:
-            self.sound.save()
         return diffusion
 
+    def find_playlist(self, sound=None, use_meta=True):
+        """
+        Find a playlist file corresponding to the sound path, such as:
+            my_sound.ogg => my_sound.csv
+
+        Use sound's file metadata if no corresponding playlist has been
+        found and `use_meta` is True.
+        """
+        if sound is None:
+            sound = self.sound
+
+        if sound.track_set.count():
+            return
+
+        # import playlist
+        path = os.path.splitext(self.sound.path)[0] + '.csv'
+        if os.path.exists(path):
+            PlaylistImport(path, sound=sound).run()
+        # use metadata
+        elif use_meta:
+            if self.info is None:
+                self.read_file_info()
+            if self.info.tags:
+                tags = self.info.tags
+                info = '{} ({})'.format(tags.get('album'), tags.get('year')) \
+                    if ('album' and 'year' in tags) else tags.get('album') \
+                    if 'album' in tags else tags.get('year', '')
+
+                track = Track(sound=sound,
+                              position=int(tags.get('tracknumber', 0)),
+                              title=tags.get('title', self.path_info['name']),
+                              artist=tags.get('artist', _('unknown')),
+                              info=info)
+                track.save()
+
 
 class MonitorHandler(PatternMatchingEventHandler):
     """
     Event handler for watchdog, in order to be used in monitoring.
     """
+    pool = None
 
-    def __init__(self, subdir):
+    def __init__(self, subdir, pool):
         """
         subdir: AIRCOX_SOUND_ARCHIVES_SUBDIR or AIRCOX_SOUND_EXCERPTS_SUBDIR
         """
         self.subdir = subdir
+        self.pool = pool
+
         if self.subdir == settings.AIRCOX_SOUND_ARCHIVES_SUBDIR:
             self.sound_kwargs = {'type': Sound.TYPE_ARCHIVE}
         else:
@@ -197,43 +214,23 @@ class MonitorHandler(PatternMatchingEventHandler):
 
     def on_modified(self, event):
         logger.info('sound modified: %s', event.src_path)
-        program = Program.get_from_path(event.src_path)
-        if not program:
-            return
-
-        si = SoundInfo(event.src_path)
-        self.sound_kwargs['program'] = program
-        si.get_sound(save=True, **self.sound_kwargs)
-        if si.year is not None:
-            si.find_episode(program)
-        si.sound.save(True)
-
-    def on_deleted(self, event):
-        logger.info('sound deleted: %s', event.src_path)
-        sound = Sound.objects.filter(path=event.src_path)
-        if sound:
-            sound = sound[0]
-            sound.type = sound.TYPE_REMOVED
-            sound.save()
+        def updated(event, sound_kwargs):
+            SoundFile(event.src_path).sync(**sound_kwargs)
+        self.pool.submit(updated, event, self.sound_kwargs)
 
     def on_moved(self, event):
         logger.info('sound moved: %s -> %s', event.src_path, event.dest_path)
-        sound = Sound.objects.filter(path=event.src_path)
-        if not sound:
-            self.on_modified(
-                FileModifiedEvent(event.dest_path)
-            )
-            return
+        def moved(event, sound_kwargs):
+            sound = Sound.objects.filter(path=event.src_path)
+            sound_file = SoundFile(event.dest_path) if not sound else sound
+            sound_file.sync(**sound_kwargs)
+        self.pool.submit(moved, event, self.sound_kwargs)
 
-        sound = sound[0]
-        sound.path = event.dest_path
-        if not sound.diffusion:
-            program = Program.get_from_path(event.src_path)
-            if program:
-                si = SoundInfo(sound.path, sound=sound)
-                if si.year is not None:
-                    si.find_episode(program)
-        sound.save()
+    def on_deleted(self, event):
+        logger.info('sound deleted: %s', event.src_path)
+        def deleted(event):
+            SoundFile(event.src_path).sync(deleted=True)
+        self.pool.submit(deleted, event.src_path)
 
 
 class Command(BaseCommand):
@@ -276,8 +273,6 @@ class Command(BaseCommand):
         if not program.ensure_dir(subdir):
             return
 
-        sound_kwargs['program'] = program
-
         subdir = os.path.join(program.path, subdir)
         sounds = []
 
@@ -287,12 +282,9 @@ class Command(BaseCommand):
             if not path.endswith(settings.AIRCOX_SOUND_FILE_EXT):
                 continue
 
-            si = SoundInfo(path)
-            sound_kwargs['program'] = program
-            si.get_sound(save=True, **sound_kwargs)
-            si.find_episode(program, save=True)
-            si.find_playlist(si.sound)
-            sounds.append(si.sound.pk)
+            sound_file = SoundFile(path)
+            sound_file.sync(program=program, **sound_kwargs)
+            sounds.append(sound_file.sound.pk)
 
         # sounds in db & unchecked
         sounds = Sound.objects.filter(path__startswith=subdir). \
@@ -307,76 +299,28 @@ class Command(BaseCommand):
         # check files
         for sound in qs:
             if sound.check_on_file():
-                sound.save(check=False)
-
-    def check_quality(self, check=False):
-        """
-        Check all files where quality has been set to bad
-        """
-        import aircox.management.commands.sounds_quality_check as quality_check
-
-        # get available sound files
-        sounds = Sound.objects.filter(is_good_quality=False) \
-                      .exclude(type=Sound.TYPE_REMOVED)
-        if check:
-            self.check_sounds(sounds)
-
-        files = [
-            sound.path for sound in sounds
-            if os.path.exists(sound.path) and sound.is_good_quality is None
-        ]
-
-        # check quality
-        logger.info('quality check...',)
-        cmd = quality_check.Command()
-        cmd.handle(files=files,
-                   **settings.AIRCOX_SOUND_QUALITY)
-
-        # update stats
-        logger.info('update stats in database')
-
-        def update_stats(sound_info, sound):
-            stats = sound_info.get_file_stats()
-            if stats:
-                duration = int(stats.get('length'))
-                sound.duration = utils.seconds_to_time(duration)
-
-        for sound_info in cmd.good:
-            sound = Sound.objects.get(path=sound_info.path)
-            sound.is_good_quality = True
-            update_stats(sound_info, sound)
-            sound.save(check=False)
-
-        for sound_info in cmd.bad:
-            sound = Sound.objects.get(path=sound_info.path)
-            update_stats(sound_info, sound)
-            sound.save(check=False)
+                sound.sync(sound=sound)
 
     def monitor(self):
-        """
-        Run in monitor mode
-        """
-        archives_handler = MonitorHandler(
-            subdir=settings.AIRCOX_SOUND_ARCHIVES_SUBDIR
-        )
-        excerpts_handler = MonitorHandler(
-            subdir=settings.AIRCOX_SOUND_EXCERPTS_SUBDIR
-        )
+        """ Run in monitor mode """
+        with futures.ThreadPoolExecutor() as pool:
+            archives_handler = MonitorHandler(settings.AIRCOX_SOUND_ARCHIVES_SUBDIR, pool)
+            excerpts_handler = MonitorHandler(settings.AIRCOX_SOUND_EXCERPTS_SUBDIR, pool)
 
-        observer = Observer()
-        observer.schedule(archives_handler, settings.AIRCOX_PROGRAMS_DIR,
-                          recursive=True)
-        observer.schedule(excerpts_handler, settings.AIRCOX_PROGRAMS_DIR,
-                          recursive=True)
-        observer.start()
+            observer = Observer()
+            observer.schedule(archives_handler, settings.AIRCOX_PROGRAMS_DIR,
+                              recursive=True)
+            observer.schedule(excerpts_handler, settings.AIRCOX_PROGRAMS_DIR,
+                              recursive=True)
+            observer.start()
 
-        def leave():
-            observer.stop()
-            observer.join()
-        atexit.register(leave)
+            def leave():
+                observer.stop()
+                observer.join()
+            atexit.register(leave)
 
-        while True:
-            time.sleep(1)
+            while True:
+                time.sleep(1)
 
     def add_arguments(self, parser):
         parser.formatter_class = RawTextHelpFormatter
diff --git a/aircox/management/commands/sounds_quality_check.py b/aircox/management/commands/sounds_quality_check.py
index 774c5e9..b2469b0 100755
--- a/aircox/management/commands/sounds_quality_check.py
+++ b/aircox/management/commands/sounds_quality_check.py
@@ -60,12 +60,6 @@ class Stats:
         self.parse(str(out, encoding='utf-8'))
 
 
-#class SoundFile:
-#    path = None
-#    sample_rate = None
-#    length = None
-
-
 class Sound:
     path = None             # file path
     sample_length = 120     # default sample length in seconds
diff --git a/aircox/models/program.py b/aircox/models/program.py
index d0bd2ee..4a739e3 100644
--- a/aircox/models/program.py
+++ b/aircox/models/program.py
@@ -99,17 +99,8 @@ class Program(Page):
         while path[0] == '/':
             path = path[1:]
 
-        while path[-1] == '/':
-            path = path[:-2]
-
-        if '/' in path:
-            path = path[:path.index('/')]
-
-        path = path.split('_')
-        path = path[-1]
-        qs = cl.objects.filter(id=int(path))
-
-        return qs[0] if qs else None
+        path = path[:path.index('/')]
+        return cl.objects.filter(slug=path.replace('_','-')).first()
 
     def ensure_dir(self, subdir=None):
         """
diff --git a/aircox/models/sound.py b/aircox/models/sound.py
index 3c5885c..e7ea873 100644
--- a/aircox/models/sound.py
+++ b/aircox/models/sound.py
@@ -165,45 +165,11 @@ class Sound(models.Model):
 
         return os.path.exists(self.path)
 
-    def file_metadata(self):
-        """
-        Get metadata from sound file and return a Track object if succeed,
-        else None.
-        """
-        if not self.file_exists():
-            return None
-
-        import mutagen
-        try:
-            meta = mutagen.File(self.path)
-        except:
-            meta = {}
-
-        if meta is None:
-            meta = {}
-
-        def get_meta(key, cast=str):
-            value = meta.get(key)
-            return cast(value[0]) if value else None
-
-        info = '{} ({})'.format(get_meta('album'), get_meta('year')) \
-            if meta and ('album' and 'year' in meta) else \
-               get_meta('album') \
-            if 'album' else \
-               ('year' in meta) and get_meta('year') or ''
-
-        return Track(sound=self,
-                     position=get_meta('tracknumber', int) or 0,
-                     title=get_meta('title') or self.name,
-                     artist=get_meta('artist') or _('unknown'),
-                     info=info)
-
     def check_on_file(self):
         """
         Check sound file info again'st self, and update informations if
         needed (do not save). Return True if there was changes.
         """
-
         if not self.file_exists():
             if self.type == self.TYPE_REMOVED:
                 return
@@ -229,7 +195,6 @@ class Sound(models.Model):
             self.is_good_quality = None
             logger.info('sound %s: m_time has changed. Reset quality info',
                         self.path)
-
             return True
 
         return changed
diff --git a/aircox/utils.py b/aircox/utils.py
index c13fd61..14653c1 100755
--- a/aircox/utils.py
+++ b/aircox/utils.py
@@ -93,6 +93,10 @@ def seconds_to_time(seconds):
     """
     Seconds to datetime.time
     """
+    seconds, microseconds = divmod(seconds, 1)
     minutes, seconds = divmod(seconds, 60)
     hours, minutes = divmod(minutes, 60)
-    return datetime.time(hour=hours, minute=minutes, second=seconds)
+    return datetime.time(hour=int(hours), minute=int(minutes), second=int(seconds),
+                         microsecond=int(microseconds*100000))
+
+