import gzip import os import yaml from django.utils.functional import cached_property from aircox.conf import settings from aircox.models import Diffusion, Sound, Track, Log __all__ = ("LogArchiver",) class LogArchiver: """Commodity class used to manage archives of logs.""" @cached_property def fields(self): return Log._meta.get_fields() @staticmethod def get_path(station, date): return os.path.join( settings.LOGS_ARCHIVES_DIR_ABS, "{}_{}.log.gz".format(date.strftime("%Y%m%d"), station.pk), ) def archive(self, qs, keep=False): """Archive logs of the given queryset. Delete archived logs if not `keep`. Return the count of archived logs """ if not qs.exists(): return 0 os.makedirs(settings.LOGS_ARCHIVES_DIR_ABS, exist_ok=True) count = qs.count() logs = self.sort_logs(qs) # Note: since we use Yaml, we can just append new logs when file # exists yet <3 for (station, date), logs in logs.items(): path = self.get_path(station, date) # FIXME: remove binary mode with gzip.open(path, "ab") as archive: data = yaml.dump([self.serialize(line) for line in logs]).encode("utf8") archive.write(data) if not keep: qs.delete() return count @staticmethod def sort_logs(qs): """Sort logs by station and date and return a dict of `{ (station,date): [logs] }`.""" qs = qs.order_by("date") logs = {} for log in qs: key = (log.station, log.date.date()) logs.setdefault(key, []).append(log) return logs def serialize(self, log): """Serialize log.""" return {i.attname: getattr(log, i.attname) for i in self.fields} def load(self, station, date): """Load an archive returning logs in a list.""" path = self.get_path(station, date) if not os.path.exists(path): return [] return self.load_file(path) def load_file(self, path): with gzip.open(path, "rb") as archive: data = archive.read() logs = yaml.safe_load(data) # we need to preload diffusions, sounds and tracks rels = { "diffusion": self.get_relations(logs, Diffusion, "diffusion"), "sound": self.get_relations(logs, Sound, "sound"), "track": self.get_relations(logs, Track, "track"), } def rel_obj(log, attr): rel_id = log.get(attr + "_id") return rels[attr][rel_id] if rel_id else None return [ Log( diffusion=rel_obj(log, "diffusion"), sound=rel_obj(log, "sound"), track=rel_obj(log, "track"), **log ) for log in logs ] @staticmethod def get_relations(logs, model, attr): """From a list of dict representing logs, retrieve related objects of the given type.""" attr_id = attr + "_id" pks = {log[attr_id] for log in logs if attr_id in log} return {rel.pk: rel for rel in model.objects.filter(pk__in=pks)}