import datetime import email.parser import email.utils import os import mailbox import re import sys import time from lxml import etree from lxml.builder import ElementMaker import filing import dating from emailer import Emailer from nitpo import Nitpo from profile import Profile from reports import Reports from xpaths import Xpaths import subprocess class Nixer(Nitpo): def __init__(self, do_verbose=False): """find data about """ super().__init__() self.do_verbose = do_verbose self.profile = Profile(do_verbose=do_verbose) self.reports = Reports(do_verbose=do_verbose) self.xpaths = Xpaths() self.emailer = Emailer(do_verbose=do_verbose) self.check_conf('folders', 'bouncer') self.check_conf('files', 'bouncebox') self.fudi = self.conf['folders']['bouncer'] self.N = "{%s}" % self.const['ns'] self.E = ElementMaker(nsmap={None: self.const['ns']}) mbox_fufi = self.conf['files']['bouncebox'] rfc = r'[Rr][Ff][Cc]822; *([^\n ]+)' self.re_firec = re.compile(f'Final-Recipient: {rfc}') self.re_orrec = re.compile(f'Original-Recipient: {rfc}') self.re_yahoo = re.compile(f'Original-Rcpt-To: ([^ ]+)\n') ## wild goose chase self.re_wilgu = re.compile(f'\nTo: +<([^> ]+)>') self.dms = {} self.reported_subbers = {} self.box = mailbox.mbox(mbox_fufi) self.mailname = self.get_mailname() self.bounces = None self.emad = None self.kapro_ignore_in_days = None self.start_time = self.now() if self.has_conf('nixer', 'kapro_ignore_in_days'): self.today = datetime.datetime.today() self.kapro_ignore_in_days = \ int(self.conf['nixer']['kapro_ignore_in_days']) self.TF = self.const['tf'] ## date of the current message self.date = None ## dates by digest of bounce self.dates = {} self.fufi = None ## current doc in library mode, to avoid reparse self.doc = None return None def run(self): self.parsa = email.parser.BytesFeedParser() self.read_box() self.store() def read_box(self): for key, msg in self.box.iteritems(): # # full text, used for wild goose change # # it is assumed that a message contains a date, and that # # date is used throughout the components if 'date' in msg: date = time.mktime(email.utils.parsedate(msg['date'])) self.date = datetime.datetime.fromtimestamp(date).strftime(self.TF) self.fut = str(msg) for part in msg.walk(): ct = part.get_content_type() if ct == 'message/delivery-status' or \ ct == 'message/feedback-report': self.act_on_part(part) def act_on_part(self, part): subber = self.re_firec.search(str(part)) if subber is None: if self.do_verbose: print("nixer: look by orrec") subber = self.re_orrec.search(str(part)) if subber is None: if self.do_verbose: print("nixer: look by yahoo") subber = self.re_yahoo.search(str(part)) if subber is None: if self.do_verbose: print("nixer: look by orrec again (why?)") subber = self.re_orrec.search(str(part)) if subber is None: err = f"nixer does see the subber in\n {part}" print(err, file=sys.stderr) return False subber = subber.groups(1)[0] if subber.startswith('<'): if subber.endswith('>'): subber = subber[1:-1] if self.profile.has(subber) == 0: ## no subber at emad, go for wild goose chang wilgu_subber = self.wild_goose_chase() if wilgu_subber is None: if subber not in self.reported_subbers: print(f'nixer: emad {subber} has no profile.') self.reported_subbers[subber] = 1 return False if self.do_verbose: print(f'nixer: {subber} --> {wilgu_subber} by wild goose chase') subber = wilgu_subber if subber not in self.dms: self.dms[subber] = [] part = str(part) digest = filing.digest(part) ## this will be incorrect of there are idential bounces with ## different dates in the inbox. But there it is difficult ## to see what the correct date. We take the earliest data if digest not in self.dates: self.dates[digest] = self.date self.dms[subber].append(str(part)) def store(self): print(self.dates) for emad in self.dms: self.store_for_subber(emad) def store_for_subber(self, emad): fufi = self.get_fufi(emad) needs_save = False added_baunzes = {} if os.path.isfile(fufi): if self.do_verbose: print(f"nixer sees a bouncer in {fufi}") nixdo_ele = filing.parse_lax(fufi).getroot() baunzids = self.xpaths.all(nixdo_ele, '//n:baunz/@id') ## check there are not several ones for baunzid in baunzids: xp = f"//n:baunz[@id='{baunzid}']" the_baunzes = self.xpaths.all(nixdo_ele, xp) if len(the_baunzes) > 1: print("nixer can't have more than 1 {bauncid} in {fufi}") if os.path.isfile(fufi): subprocess.run(f"emacs {fufi}", shell=True) quit() else: needs_save = True nixdo_ele = self.E(self.N + 'nixdo') baunzids = [] nixdo_ele.attrib['emad'] = emad for part in reversed(self.dms[emad]): digest = filing.digest(part) if digest in added_baunzes: continue ## check for existing bounces xp = f"//n:baunz[@id='{digest}']" found_baunz = self.xpaths.none_or_one(nixdo_ele, xp) if found_baunz is not None: ## this condition is here to deal with historical data, ## as with later version of this code a date should ## have been set. if digest in self.dates and 'date' not in found_baunz.attrib: found_baunz.attrib['date'] = self.dates[digest] if 'put_in' not in found_baunz.attrib: found_baunz.attrib['put_in'] = self.start_time continue if found_baunz.attrib['put_in'] != self.start_time: found_baunz.attrib['put_in'] = self.start_time needs_save = True ## the bounce is there, don't create it if self.do_verbose: print(f"nixer: I have {digest}.") continue needs_save = True added_baunzes[digest] = 1 baunz_ele = etree.SubElement(nixdo_ele, self.N + 'baunz') # # we use the date to keep the earliest appearance if digest in self.dates and 'date' not in baunz_ele.attrib: baunz_ele.attrib['date'] = self.dates[digest] baunz_ele.attrib['id'] = digest baunz_ele.attrib['put_in'] = self.start_time pretty_part = self.pretty(part) if self.do_verbose: print(f"nixer adds \n{pretty_part}") baunz_ele.text = pretty_part # print(docing.show(nixdo_ele)) if not needs_save: if self.do_verbose: print(f"nixer: no save for {fufi}") return False filing.install_xml(nixdo_ele, fufi) return True # def baunz_ages(self, nixdo_ele, baunzids): # for bauncid in baunzids: # xp = f"//n:baunz[bauncid='{bauncid}']/@put_in" # put_in = self.xpaths.one(nixdo_ele, xp) # if put_in is None: # continue # if self.do_verbose: # print(f"nixer found bounce {put_in}.") def pretty(self, string): string = "\n" + string string = string.replace("\n\n", "\n") ## prettyprint xml string = string + ' ' return string def get_fufi(self, emad): relfi = self.profile.relfi_from_emad(emad) fufi = self.fudi + '/' + relfi return fufi def has_it_fufi(self, emad): fufi = self.get_fufi(emad) if os.path.isfile(fufi): return True return False def has_it_current_fufi(self, emad): fufi = self.get_fufi(emad) if not os.path.isfile(fufi): return False if self.kapro_ignore_in_days is None: return True modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(fufi)) age = self.today - modified_date if age.days > self.kapro_ignore_in_days: return False return True def wild_goose_chase(self): """try to find the subber from self.fut""" emads = self.re_wilgu.findall(self.fut) if emads is None: if self.do_verbose: print('nixer: wild goose chase yields no emads') return None found_emads = [] for emad in emads: if self.profile.has(emad) != 1: continue if emad not in found_emads: found_emads.append(emad) if len(found_emads) == 0: if self.do_verbose: print('nixer: wild goose chase yields no subbers') return None if len(found_emads) > 1: if self.do_verbose: print(f"nixer: wild goose chase yields {found_emads}") return None return found_emads[0] def lib(self): """library run through files""" inp_fudi = self.conf['folders']['bouncer'] for root, dirs, banas in os.walk(inp_fudi): for bana in banas: if not bana.endswith(".xml"): print(f"I skip {bana}") continue self.fufi = root + '/' + bana self.doc = filing.parse_lax(self.fufi) is_done = self.delete_bounce_without_profile() if is_done: continue self.list_bounces() ## returns final recipient if not similar firec = self.check_final_vs_emad() ## for the moment this is only action if not isinstance(firec, str): continue print(self.emad + " -> " + firec) add_ele = self.doc.getroot() add_ele.attrib['fixed'] = 'firec_bounce' add_ele.attrib['inject'] = self.now() self.profile.move_emad(self.emad, firec, add_ele=add_ele) print(f"nixer removes {self.fufi}") os.remove(self.fufi) return None def delete_bounce_without_profile(self): doc = self.doc emad = str(self.xpaths.one(doc, "@emad")) self.emad = emad profile_fufi = self.profile.fufi_from_emad(emad) if not os.path.isfile(profile_fufi): print(f"nixer: no {profile_fufi}, I delete {self.fufi}") os.remove(self.fufi) self.fufi = None return True return False def check_final_vs_emad(self): doc = self.doc emad = self.emad bounce = str(self.xpaths.one_or_first(doc, 'n:baunz/text()')) finare = self.re_firec.search(bounce) if finare is None: print(f"nixer: no final recipient for {emad}") return None ## there must be something wrong here # firec = finare.groups(0)[0] + finare.groups(0)[1] firec = finare.groups(0)[0] if firec == emad: return False if self.is_similar_domain(emad, firec): return False if self.is_any_bounce_local(): ## local bounces must be excluded from emad change return False return firec def is_similar_domain(self, emad1, emad2): domain_1 = emad1.partition('@')[2] domain_2 = emad2.partition('@')[2] parts_1 = domain_1.split('.') parts_1.reverse() parts_2 = domain_2.split('.') parts_2.reverse() min_length = min(len(parts_1), len(parts_2)) count = 0 ## take min_length - 1 to leave out first part while count < min_length - 1: if parts_1[count] != parts_2[count]: return False count = count + 1 return True def get_mailname(self): """only used here""" fufi = "/etc/mailname" mailname = filing.sread(fufi).strip() return mailname def list_bounces(self): doc = self.doc self.bounces = list(self.xpaths.all(doc, 'n:baunz/text()')) return self.bounces def is_any_bounce_local(self): """to prevent an emad change like""" """adrian.burdziak@wp.pl -> postmaster@wp.nl""" status = False test_local = 'Reporting-MTA: dns; ' + self.mailname for bounce in self.bounces: if test_local in bounce: status = True return status def load(self, emad): fufi = self.get_fufi(emad) if not os.path.isfile(fufi): if self.do_verbose: print(f"nixer sees no fufi for {emad}") return None parser = etree.XMLParser(remove_blank_text=True) return etree.parse(fufi, parser) def baufi(self, emad, baunz_doc, profile_doc, repcode): """bounce feedback""" profile_doc = self.profile.load(emad) report_doc = None if repcode is not None: report_doc = self.reports.load(repcode) bouncer_doc = self.load(emad) if bouncer_doc is None: print(f"I have no bouncer for {emad}") return None profile_ele = profile_doc.getroot() bouncer_ele = bouncer_doc.getroot() profile_ele.append(bouncer_ele) if report_doc is not None: profile_ele.append(report_doc.getroot()) # print(docing.show(profile_ele)) maix = etree.ElementTree(profile_ele) base = emad + '.baufi' out = self.emailer.prepare(maix, empro='baufi', base=base, do_repeat=True) return out