import os import glob import gzip import random import lxml.etree as etree # from lxml.builder import E from datetime import datetime import filer from profile import Profile MEMBER_FUDI = '/home/ernad/var/membership' MEMBERSHIP_FUFI = '/tmp/membership.json.gz' def diff(d1, d2): d1 = datetime.strptime(d1, "%Y-%m-%d") d2 = datetime.strptime(d2, "%Y-%m-%d") return abs((d2 - d1).days) def member_data(): out = {} for report_fudi in glob.glob(MEMBER_FUDI + '/*'): repcode = os.path.basename(report_fudi) collect_for_report(repcode, out) # # remove dead surks with short for emad in out: if 'dead' not in out[emad]: continue for repcode in list(out[emad]['dead']): if len(out[emad]['dead'][repcode]) > 1: continue from_date = out[emad]['dead'][repcode][0]['from'] until_date = out[emad]['dead'][repcode][0]['until'] if diff(from_date, until_date) < 7: del out[emad]['dead'][repcode] for emad in out: for state in ['live', 'dead']: if state not in out[emad]: continue if len(out[emad][state]) == 0: del out[emad][state] for emad in out: for state in ['live', 'dead']: if state not in out[emad]: continue if len(out[emad][state]) == 0: del out[emad][state] for emad in list(out): if ('live' not in out[emad]) and ('dead' not in out[emad]): del out[emad] filer.dump(out, MEMBERSHIP_FUFI) def collect_for_report(repcode, out): report_fudi = MEMBER_FUDI + '/' + repcode ### #if repcode != 'nep-pub': # return ### for year_fudi in sorted(glob.glob(report_fudi + '/*')): ### #if year_fudi != '/home/ernad/var/membership/nep-pub/2021': # continue ### for date_file in sorted(glob.glob(year_fudi + '/*')): read(date_file, out, repcode) # print(out) def read(fufi, out, repcode): bana = os.path.basename(fufi) date = bana[8:18] # # emads in the file emads = {} if(fufi[-3:] != '.gz'): the_file = open(fufi, 'r') else: the_file = gzip.GzipFile(fufi, 'r') while (line := the_file.readline().rstrip()): if isinstance(line, bytes): try: emad = line.decode() except UnicodeDecodeError: emad = line.decode('cp1252') else: emad = line emads[emad] = 1 if emad not in out: out[emad] = {} out[emad]['live'] = {} if repcode not in out[emad]['live']: out[emad]['live'][repcode] = {} if 'from' not in out[emad]['live'][repcode]: out[emad]['live'][repcode]['from'] = date for emad in out: for live_repcode in list(out[emad]['live']): if live_repcode != repcode: continue if emad not in emads: if 'dead' not in out[emad]: out[emad]['dead'] = {} if repcode not in out[emad]['dead']: out[emad]['dead'][repcode] = [] add = {} add['from'] = out[emad]['live'][repcode]['from'] add['until'] = date out[emad]['dead'][repcode].append(add) del out[emad]['live'][repcode] def xmls(): p = Profile() d = filer.load(MEMBERSHIP_FUFI) emads = list(d) random.shuffle(emads) for emad in emads: ### #if 'bittschi' not in emad: # continue ### #print(emad) #continue out_fufi = p.fufi_from_emad(emad) filer.prepare(out_fufi) profile_doc = xml(p, emad, d[emad]) is_dead = p.is_dead(profile_doc) if is_dead and not out_fufi.endswith('.gz'): out_fufi += '.gz' # out = etree.tostring(profile_doc, pretty_print=True).decode() p.write(profile_doc, emad) #print(out) #quit() def xml(p, emad, d): NS = p.const['ns'] NSMAP = {None: NS} profile_ele = etree.Element('profile', nsmap=NSMAP) profile_ele.set('emad', emad) if 'live' in d: live(profile_ele, d['live'], NSMAP) if 'dead' in d: dead(profile_ele, d['dead'], NSMAP) #out = etree.tostring(profile_ele, pretty_print=True).decode() #return out profile_doc = etree.ElementTree(profile_ele) return profile_doc def dead(profile_ele, d_dead, NSMAP): dead_ele = etree.SubElement(profile_ele, 'dead', nsmap=NSMAP) for repcode in d_dead: count_surk = 0 # # there maybe several surks per repcode while count_surk < len(d_dead[repcode]): surk_ele = etree.SubElement(dead_ele, 'surk', nsmap=NSMAP) surk_ele.set('repcode', repcode) surk_ele.set('spro', 'v') surk_ele.set('from', d_dead[repcode][count_surk]['from']) surk_ele.set('until', d_dead[repcode][count_surk]['until']) count_surk += 1 def live(profile_ele, d_live, NSMAP): live_ele = etree.SubElement(profile_ele, 'live', nsmap=NSMAP) for repcode in d_live: surk_ele = etree.SubElement(live_ele, 'surk', nsmap=NSMAP) surk_ele.set('repcode', repcode) surk_ele.set('spro', 'v') surk_ele.set('from', d_live[repcode]['from'])