#!/usr/bin/env python3 import re import datetime import itertools from collections import defaultdict from typing import Any, Union, AsyncGenerator, Iterable class BucketCache(object): """Cache data in buckets, with customizable bucket keys and fetch callback""" def __init__(self, data: dict=None): self.data = defaultdict(list, {} if data is None else data) def add(self, value) -> None: """Add one value""" self.data[bkt := self.bucket(self.bucketkey(value))].append(value) def update(self, iterable: Iterable) -> None: """Add multiple values""" for value in iterable: self.add(iterable) async def retrieve(self, key_start, key_stop, ctx=None) -> AsyncGenerator[Any, None]: """Retrieve range, calling fetch callback for fresh data if needed""" bkt_start = self.bucket(key_start) bkt_stop = self.bucket(key_stop) missing = (idx for idx in range(bkt_start, bkt_stop + 1) if idx not in self.data) for k, g in itertools.groupby(missing, key=(lambda n, c=itertools.count(): n - next(c))): # group values into ranges async for value in self.fetch(self.unbucket((g := tuple(g))[0]), self.unbucket(g[-1]), ctx): if bkt_start <= self.bucket(self.bucketkey(value)) <= bkt_stop: self.add(value) for idx in range(bkt_start, bkt_stop + 1): # create empty bucket if not exists for value in self.data[idx]: if key_start <= self.bucketkey(value) <= key_stop: yield value @staticmethod def bucketkey(value): """Get bucket key from value""" raise NotImplementedError @staticmethod def bucket(key) -> int: """Get bucket index from bucket key""" raise NotImplementedError @staticmethod def unbucket(idx: int): raise NotImplementedError @staticmethod async def fetch(start, stop, ctx=None) -> AsyncGenerator[Any, None]: raise NotImplementedError class TimeSeriesCache(BucketCache): """Cache time series data in daily buckets""" @staticmethod def bucket(key: datetime.datetime) -> int: return ((key.date() if isinstance(key, datetime.datetime) else key) - datetime.date(1970, 1, 1)).days @staticmethod def unbucket(idx: int) -> datetime.datetime: return datetime.date(1970, 1, 1) + datetime.timedelta(days=idx) re_dt_fileman = r'(?P(\d{3})(\d{2})(\d{2})\.(\d{2})(\d{2})(\d{2}))' # George Timson's format re_dt_today = r'(?PT)' # today re_dt_now = r'(?PN)' # now re_dt_mdy = r'(?P(\d{1,2})[^\w@?]+(\d{1,2})[^\w@?]+(\d{2}|\d{4})\s*)' # m/d/yy, m/d/yyyy re_dt_ymd = r'(?P(\d{4})[^\w@?]+(\d{1,2})[^\w@?]+(\d{1,2})\s*)' # yyyy/m/d re_dt_yyyymmdd = r'(?P(\d{4})(\d{2})(\d{2}))' # yyyymmdd re_dt_Mdy = r'(?P([A-Z]{3,})[^\w@?]+(\d{1,2})[^\w@?]+(\d{2}|\d{4})\s*)' # M/d/yy, M/d/yyyy re_dt_dMy = r'(?P((\d{1,2})[^\w@?]+[A-Z]{3,})[^\w@?]+(\d{2}|\d{4})\s*)' # d/M/yy, d/M/yyyy re_dt_md = r'(?P(\d{1,2})[^\w@?]+(\d{1,2})\s*)' # m/d re_dt_offset = r'(?P([-+]\d+)(H|W|M)?)' # +#U re_dt_time = r'(?:@?(?P