import os import glob import gzip import random import lxml.etree as etree import subprocess from lxml.builder import ElementMaker # from lxml.builder import E from datetime import datetime import filing from profile import Profile p = Profile() NS = p.const['ns'] E = ElementMaker(nsmap={None: NS}) MEMBER_FUDI = '/home/ernad/var/membership' MEMBERSHIP_FUFI = '/tmp/membership.json.gz' BAD_REPCODES = ('nep-all', 'nep-xxx', 'nep-zzz') def diff(d1, d2): d1 = datetime.strptime(d1, "%Y-%m-%d") d2 = datetime.strptime(d2, "%Y-%m-%d") return abs((d2 - d1).days) def member_data(): out = {} for report_fudi in glob.glob(MEMBER_FUDI + '/*'): repcode = os.path.basename(report_fudi) collect_for_report(repcode, out) # # remove dead surks with short for emad in out: if 'dead' not in out[emad]: continue for repcode in list(out[emad]['dead']): if len(out[emad]['dead'][repcode]) > 1: continue from_date = out[emad]['dead'][repcode][0]['from'] until_date = out[emad]['dead'][repcode][0]['until'] if diff(from_date, until_date) < 7: del out[emad]['dead'][repcode] for emad in out: for state in ['live', 'dead']: if state not in out[emad]: continue if len(out[emad][state]) == 0: del out[emad][state] for emad in out: for state in ['live', 'dead']: if state not in out[emad]: continue if len(out[emad][state]) == 0: del out[emad][state] for emad in list(out): if ('live' not in out[emad]) and ('dead' not in out[emad]): del out[emad] filing.dump(out, MEMBERSHIP_FUFI) def collect_for_report(repcode, out): report_fudi = MEMBER_FUDI + '/' + repcode if repcode in BAD_REPCODES: return for year_fudi in sorted(glob.glob(report_fudi + '/*')): ### #if year_fudi != '/home/ernad/var/membership/nep-pub/2021': # continue ### for date_file in sorted(glob.glob(year_fudi + '/*')): read(date_file, out, repcode) # print(out) def read(fufi, out, repcode): bana = os.path.basename(fufi) date = bana[8:18] # # emads in the file emads = {} if(fufi[-3:] != '.gz'): the_file = open(fufi, 'r') else: the_file = gzip.GzipFile(fufi, 'r') while (line := the_file.readline().rstrip()): if isinstance(line, bytes): try: emad = line.decode() except UnicodeDecodeError: emad = line.decode('cp1252') else: emad = line emads[emad] = 1 if emad not in out: out[emad] = {} out[emad]['live'] = {} if repcode not in out[emad]['live']: out[emad]['live'][repcode] = {} if 'from' not in out[emad]['live'][repcode]: out[emad]['live'][repcode]['from'] = date for emad in out: for live_repcode in list(out[emad]['live']): if live_repcode != repcode: continue if emad not in emads: if 'dead' not in out[emad]: out[emad]['dead'] = {} if repcode not in out[emad]['dead']: out[emad]['dead'][repcode] = [] add = {} add['from'] = out[emad]['live'][repcode]['from'] add['until'] = date out[emad]['dead'][repcode].append(add) del out[emad]['live'][repcode] def xmls(): p = Profile() d = filing.load(MEMBERSHIP_FUFI) emads = list(d) random.shuffle(emads) for emad in emads: out_fufi = p.fufi_from_emad(emad) filing.prepare(out_fufi) profile_doc = xml(p, emad, d[emad]) is_dead = is_it_dead(profile_doc) # out = etree.tostring(profile_doc, pretty_print=True).decode() out_string = etree.tostring(profile_doc, pretty_print=True).decode() out_file = open(out_fufi, 'w') out_file.write(out_string) out_file.close() if is_dead: # and not out_fufi.endswith('.gz'): subprocess.run('/bin/gzip ' + out_fufi, shell=True) #p.write(profile_doc, emad) #print(out) #quit() def xml(p, emad, d): NS = p.const['ns'] NSMAP = {None: NS} profile_ele = etree.Element('profile', nsmap=NSMAP) profile_ele.set('emad', emad) if 'live' in d: live(profile_ele, d['live'], NSMAP) if 'dead' in d: dead(profile_ele, d['dead'], NSMAP) #out = etree.tostring(profile_ele, pretty_print=True).decode() #return out profile_doc = etree.ElementTree(profile_ele) return profile_doc def dead(profile_ele, d_dead, NSMAP): dead_ele = etree.SubElement(profile_ele, 'dead', nsmap=NSMAP) for repcode in d_dead: if repcode in BAD_REPCODES: continue count_surk = 0 # # there maybe several surks per repcode while count_surk < len(d_dead[repcode]): surk_ele = etree.SubElement(dead_ele, 'surk', nsmap=NSMAP) surk_ele.set('repcode', repcode) surk_ele.set('spro', 'v') surk_ele.set('from', d_dead[repcode][count_surk]['from']) surk_ele.set('until', d_dead[repcode][count_surk]['until']) count_surk += 1 def live(profile_ele, d_live, NSMAP): live_ele = etree.SubElement(profile_ele, 'live', nsmap=NSMAP) for repcode in d_live: if repcode in BAD_REPCODES: continue surk_ele = etree.SubElement(live_ele, 'surk', nsmap=NSMAP) surk_ele.set('repcode', repcode) surk_ele.set('spro', 'v') surk_ele.set('from', d_live[repcode]['from']) def is_it_dead(doc): """is the profile dead? non-NS compliant""" xp = 'live/*' live_count = len(doc.xpath(xp, namespaces={'n': NS})) if live_count == 0: return True return False