#!/usr/bin/python3 import os import glob import sys from nitpo import Nitpo class Infile(Nitpo): def __init__(self, do_verbose=False): """simple class to extract repis""" super().__init__() if not self.has_conf('ext', 'repis'): print("infile needs [ext][repis] defined.", file=sys.stderr) sys.exit() if not self.has_conf('folders', 'repis'): print("infile needs [folders][repis] defined.", file=sys.stderr) sys.exit() self.len_ext = len(self.conf['ext']['repis']) if not self.has_conf('chars', 'repisep'): print("infile needs [char][repisep] defined.", file=sys.stderr) sys.exit() self.repisep = self.conf['chars']['repisep'] # self.suredo = Suredo() self.do_verbose = do_verbose def bana_chop_ext(self, fufi): bana = os.path.basename(fufi) len_ext = self.len_ext if bana.endswith('.gz'): bana = bana[0:-3] bana = bana[0:-len_ext] return bana def get_repcode(self, fufi): bana = os.path.basename(fufi) parts = bana.partition(self.repisep) return parts[0] def get_prior(self, fufi): bana = os.path.basename(fufi) parts = bana.partition(self.repisep) after_repcode = parts[2] count_chop = self.len_ext if after_repcode.endswith('.gz'): count_chop += 3 elif after_repcode.endswith('.txt'): count_chop = 4 repcode = after_repcode[0:-count_chop] return repcode def is_repis(self, fufi): """really: is this a repis file""" test_fufi = fufi if fufi.endswith('.gz'): test_fufi = fufi[0:-3] # # check that the fufi is in [folders][repis] if not fufi.startswith(self.conf['folders']['repis']): note = f"infile; {fufi} is not in the repis folder " note += f" {self.conf['folders']['repis']}" print(note) return False if test_fufi.endswith(self.conf['ext']['repis']): return True return False def get_issuedate(self, fufi): return os.path.basename(os.path.dirname(fufi)) def get_base(self, fufi): issuedate = self.get_issuedate(fufi) bana = self.bana_chop_ext(fufi) base = issuedate + '/' + bana return base def is_duplicate(self, fufi): fudi = os.path.dirname(fufi) repcode = self.get_repcode(fufi) glob_string = f"{fudi}/*{repcode}*" fufis = glob.glob(glob_string) if len(fufis) == 0: return False return True def list_by_prior(self, fudi, do_show=False): """for all types of infiles""" glob_string = f"{fudi}/*" fufis = glob.glob(glob_string) fufi_list = [] priors = {} for fufi in fufis: if not self.is_known_type(fufi): print(f"infile: unkown file type {fufi}", file=sys.stderr) continue priors[fufi] = self.get_prior(fufi) if len(priors) == 0: return [] fufi_list = sorted(fufis, key=priors.get) if do_show: for fufi in fufi_list: print(fufi) return fufi_list def is_known_type(self, fufi): test_fufi = fufi if fufi.endswith('.gz'): test_fufi = fufi[-3] if test_fufi.endswith('.xml'): return True if test_fufi.endswith('.txt'): return True if test_fufi.endswith('.json'): return True if test_fufi.endswith('.mail'): return True return False