#!/usr/bin/python3 import glob import os import sys import filing from nitpo import Nitpo from sheets import Sheets from infile import Infile class Docids(Nitpo): def __init__(self, do_verbose=False): """simple class to extract docids""" super().__init__() self.infile = Infile(do_verbose=do_verbose) if not self.has_conf('folders', 'docids'): print("docids needs a docids folder.", file=sys.stderr) sys.exit() self.out_fudi = self.conf['folders']['docids'] if not self.has_conf('folders', 'repis'): print("docids need a repis folder.", file=sys.stderr) sys.exit() self.repis_fudi = self.conf['folders']['repis'] # self.repis = Repis() self.sheets = Sheets(do_verbose=do_verbose) self.do_verbose = do_verbose def update_all(self, fufi): """do all files we have for the issuedate of fufi""" issuedate = self.infile.get_issuedate(fufi) self.update_issuedate(issuedate) def update_issuedate(self, issuedate): fudi = self.repis_fudi + '/' + issuedate if not os.path.isdir(fudi): print(f"docids does not see {fudi}", file=sys.stderr) return None glob_string = fudi + '/*' fufis = glob.glob(glob_string) for fufi in fufis: self.update_fufi(fufi) def get_out_fufi(self, repis_fufi): issuedate = self.infile.get_issuedate(repis_fufi) out_fufi = self.conf['folders']['docids'] + '/' + issuedate babana = self.infile.bana_chop_ext(repis_fufi) out_fufi += '/' + babana + '.txt' return out_fufi def update_fufi(self, repis_fufi): if not self.infile.is_repis(repis_fufi): # raise Exception(fufi) print(f"docids sees junk {repis_fufi}", file=sys.stderr) return None if self.do_verbose: print(f"docis: fufi is {repis_fufi}") out_fufi = self.get_out_fufi(repis_fufi) if not filing.donere(out_fufi, [repis_fufi]): if self.do_verbose: print(f"docis: {out_fufi} needs no update over {repis_fufi}") return None data = self.sheets.fufi_to_fufi('docids', repis_fufi, out_fufi) return data def of_repis(self, repis_fufi): docids = self.update_fufi(repis_fufi) if docids is not None: return docids in_fufi = self.get_out_fufi(repis_fufi) docids = self.get_docids(in_fufi) return docids def are_there_any(self, repis_fufi): out = self.sheets.fufi_to_string('docids', repis_fufi).strip() if len(out) > 0: return True return False # def list_fufis(self, issuedate, limit=None): # out_fudi = self.out_fudi + '/' + issuedate # glob_string = out_fudi + '/*.txt' # fufis = sorted(filter(os.path.isfile, glob.glob(glob_string))) # if limit is None: # return fufis # limited_out_fufis = [] # for fufi in fufis: # prior = self.infile.get_prior(fufi) # if prior >= limit: # break # limited_out_fufis.append(fufi) # if self.do_verbose: # print(f"limit is {limit}") # print(str(limited_out_fufis)) # print(limited_out_fufis) # return limited_out_fufis def get_docids(self, fufi): file = open(fufi, 'r') docids = [] for line in file.readlines(): docids.append(line.strip()) return docids