diff --git a/bin/HPSS.py b/bin/HPSS.py index 30ec84e..c383763 100644 --- a/bin/HPSS.py +++ b/bin/HPSS.py @@ -6,6 +6,40 @@ import json +class Thing(object): + pass + + +class UnknownType(object): + _ = None + + def __new__(cls): + if UnknownType._ is None: + UnknownType._ = object.__new__(cls) + return UnknownType._ + + def __repr__(self): + return "<|???|>" + +Unknown = UnknownType() + +#-------------------------------------------------------- +#-- Set up an alias as a way of easily describing that | +#-- HPSSpath objects are "pure" paths and do not exist | +#-- on the local machine's file system. | +#-------------------------------------------------------- +class iHPSS: + _ = None + + def __new__(cls): + if iHPSS._ is None: + iHPSS._ = object.__new__(cls) + return iHPSS._ + + def __matmul__(self, p): + return pathlib.PurePosixPath(p) + +HPSS = iHPSS() def asDateTime(x): @@ -19,14 +53,48 @@ def asDateTime(x): raise ValueError("cannot interpret value of type {} as instance of datetime.datetime".format( str(type(x)) )) +class fperms(tuple): + def __new__(cls, perms = None): + if (perms is None): + bits = "?"*10 + return cls(bits) + else: + return tuple.__new__(cls, (perms,)) + + def __str__(self): + return self[0] + + @property + def entry(self): + return self[0][0] + + def isdir(self): + return (self.entry == 'd') + + def isfile(self): + return (self.entry == '-') + + @property + def owner(self): + return self[0][1:4] + + @property + def group(self): + return self[0][4:7] + + @property + def other(self): + return self[0][7:10] + class fstat(tuple): def __new__(cls, path, size, **kwargs): - obj = { } - obj['path'] = pathlib.Path(path) + obj = kwargs.copy() + obj['path'] = HPSS @ path obj['size'] = int(size) + obj['perms'] = fperms(kwargs['perms']) if (kwargs.get('perms', None) is not None) else None - for kwarg in ('perms', 'owner', 'group', 'stored', 'xtype'): + for kwarg in ('owner', 'group', 'stored', 'xtype'): obj[kwarg] = kwargs.get(kwarg, None) obj['links'] = None if ('links' not in kwargs) else int(kwargs['links']) @@ -38,16 +106,16 @@ def __new__(cls, path, size, **kwargs): return tuple.__new__(cls, s) - path = property(operator.itemgetter(0)) - size = property(operator.itemgetter(1)) - perms = property(operator.itemgetter(2)) - links = property(operator.itemgetter(3)) - owner = property(operator.itemgetter(4)) - group = property(operator.itemgetter(5)) - stored = property(operator.itemgetter(6)) - created = property(operator.itemgetter(7)) - modified = property(operator.itemgetter(8)) - xtype = property(operator.itemgetter(9)) + path = property(operator.itemgetter(0)) + size = property(operator.itemgetter(1)) + perms = property(operator.itemgetter(2)) + links = property(operator.itemgetter(3)) + owner = property(operator.itemgetter(4)) + group = property(operator.itemgetter(5)) + stored = property(operator.itemgetter(6)) + created = property(operator.itemgetter(7)) + modified = property(operator.itemgetter(8)) + xtype = property(operator.itemgetter(9)) def __add__(self, other): me = self.toJDN() @@ -55,23 +123,11 @@ def __add__(self, other): me.update(you) return fstat.fromJDN( me ) - @property - def parent(self): - return self.path.parent - - @property - def folder(self): - return self.path.parent - def isdir(self): - return (self.perms[0] == 'd') + return self.perms.isdir() def isfile(self): - return (self.perms[0] != 'd') - - @property - def name(self): - return self.path.name + return self.perms.isfile() def toJSON(self, **kwargs): return json.dumps( self.toJDN(**kwargs), sort_keys = True, indent = 3 ) @@ -81,27 +137,96 @@ def toJDN(self, **kwargs): 'path': str(self.path), 'size': self.size } - for attr in ('perms', 'links', 'owner', 'group', 'stored', 'created', 'modified'): + jdn['perms'] = str(self.perms) if (self.perms is not None) else None + jdn['created'] = self.created.isoformat() if (self.created is not None) else None + jdn['modified'] = self.created.isoformat() if (self.modified is not None) else None + + for attr in ('links', 'owner', 'group', 'stored'): jdn[attr] = getattr(self, attr) return jdn @classmethod def fromJDN(cls, jdn): path = pathlib.Path(jdn['path']) - size = int(jdn['size']) + size = int(jdn['size']) kwargs = { } for attr in ('perms', 'links', 'owner', 'group', 'stored', 'created', 'modified'): if attr in jdn: kwargs[attr] = jdn[attr] return cls(path, size, **kwargs) + @staticmethod + def from_ls_entry(text): + tokens = [token.strip( ) for token in text.split()] + + perms = tokens[0] + links = int(tokens[1]) + owner = tokens[2] + group = tokens[3] + path = tokens[-1] + + if len(tokens) == 14: + #-- this is a normal file + stored = tokens[6] + size = int(tokens[7]) + dow = tokens[8] + month3 = tokens[9] + dom = tokens[10] + hms = tokens[11] + year4 = tokens[12] + + elif len(tokens) == 12: + #-- this is likely a folder. + stored = None + size = int(tokens[5]) + dow = tokens[6] + month3 = tokens[7] + dom = tokens[8] + hms = tokens[9] + year4 = tokens[10] + + dts = " ".join([dow, month3, dom, hms, year4]) + modified = datetime.datetime.strptime(dts, "%c") + + return fstat(path, size, perms = perms, owner = owner, group = group, stored = stored, links = links, modified = modified) + + + + +class Skill: + def __init__(self, shell): + self.shell = shell + + def do(self, request): + return self.shell.do(request) + + +class AnnotationSkill(Skill): + def __getitem__(self, path): + note = "" + request = "ls -A {}".format( str(path) ) + proc = self.do(request) + lines = proc.stdout.decode('utf-8').split('\n') + if len(lines) >= 3: + aline = lines[2] + if aline.startswith('Annotation:'): + note = aline[11:].strip() + return note + + def __setitem__(self, path, note): + request = 'annotate -A "{}" {}'.format(note, str(path)) + proc = self.do(request) + def __delitem__(self, path): + request = 'annotate -e {}'.format(str(path)) + proc = self.do(request) class HSI: def __init__(self, xpath = None, **kwargs): self.xpath = pathlib.Path("/usr/local/bin/hsi") self.login = os.getlogin() + self.proc = None if xpath is not None: self.xpath = pathlib.Path(xpath) @@ -109,32 +234,35 @@ def __init__(self, xpath = None, **kwargs): if 'login' in kwargs: self.login = kwargs['login'] + self.annotations = AnnotationSkill(self) def do(self, command): - comargs = [str(self.xpath), "-l", "ndenny", "-q", "-P", "{}".format(command)] - proc = subprocess.run(comargs, capture_output = True, check = True) - return proc + comargs = [str(self.xpath), "-l", self.login, "-q", "-P", "{}".format(command)] + self.procd = subprocess.run(comargs, capture_output = True, check = True) + return self.procd - def stat(self, target): + def statx(self, target): """ Answers a slightly different set of stats for the given path. """ - if isinstance(target, pathlib.Path): - return self.stat( self.ls(target) ) - elif isinstance(target, str): - return self.stat( pathlib.Path(target) ) + if isinstance(target, (str, pathlib.Path)): + entries = self.ls( pathlib.Path(target) ) + return self.statx( entries[0] ) + elif isinstance(target, fstat): - proc = self.do("ls -d -P {}".format( str(path) )) + request = "ls -d -P {}".format( str(target.path) ) + proc = self.do(request) + lines = [line.strip() for line in proc.stdout.decode('utf-8').split('\n')] line = lines[0] tokens = [token.strip() for token in line.split()] size = 0 - kwargs = { - 'xtype': tokens[0] - } + obj = target.toJDN() - if kwargs['xtype'] in ('FILE', 'HARDLINK'): + obj['xtype'] = tokens[0] + + if obj['xtype'] in ('FILE', 'HARDLINK'): size = int(tokens[2]) created_mdy = tokens[9] created_time = tokens[10] @@ -142,68 +270,56 @@ def stat(self, target): modified_mdy = tokens[11] modified_time = tokens[12] - kwargs['created'] = datetime.datetime.strptime("{} {}".format(created_mdy, created_time), "%m/%d/%Y %H:%M:%S") - kwargs['modified'] = datetime.datetime.strptime("{} {}".format(modified_mdy, modified_time), "%m/%d/%Y %H:%M:%S") + obj['created'] = datetime.datetime.strptime("{} {}".format(created_mdy, created_time), "%m/%d/%Y %H:%M:%S") + obj['modified'] = datetime.datetime.strptime("{} {}".format(modified_mdy, modified_time), "%m/%d/%Y %H:%M:%S") + + for k,v in obj.items(): + print("{:40s} -> {}".format(k, str(v))) - return fstat(path, size, **kwargs) + return fstat.fromJDN(obj) + + raise ValueError + + def catalog(self, path = None): + """ + Answers a large list of stat like objects for all entries and sub-entries at the given path. + """ + request = "ls -NUDR" if (path is None) else "ls -NUDR {}".format(str(path)) + proc = self.do(request) + lines = [ ] + for line in proc.stdout.decode('utf-8').split('\n'): + line = line.strip() + if line: + lines.append(line) + entries = [fstat.from_ls_entry(line) for line in lines] + return entries def ls(self, path = None): """ Answers a list of stat-like objects for entries at the given path. """ - if path: - proc = self.do("ls -lUD {}".format( str(path) )) - else: - proc = self.do("ls -lUD") + #---------------------------------------------- + #-- Construct and execute the request via HSI | + #---------------------------------------------- + request = "ls -lUD" if (path is None) else "ls -lUD {}".format( str(path) ) + procd = self.do(request) + + #-------------------------- + #-- Parse the results ... | + #-------------------------- + lines = [line.strip() for line in procd.stdout.decode('utf-8').split('\n')] + lines = [line for line in lines if len(line) > 0] - entries = [ ] + entries = [fstat.parsed_from_ls_entry(line) for line in lines[1:]] + + return entries + + def lsx(self, path = None): + """ + answers a namespace listing with extended file stats. + """ + return [self.statx(entry) for entry in self.ls(path)] - lines = [line.strip() for line in proc.stdout.decode('utf-8').split('\n')] - lines = [line for line in lines if len(line) > 0] - folder = pathlib.Path(lines[0][:-1]) - for line in lines[1:]: - tokens = [token.strip( ) for token in line.split()] - - perms = tokens[0] - links = int(tokens[1]) - owner = tokens[2] - group = tokens[3] - name = tokens[-1] - - #for i, token in enumerate(tokens): - # print("{:02d}: {}".format(i, token)) - if len(tokens) == 14: - #-- this is a normal file - stored = tokens[6] - size = int(tokens[7]) - dow = tokens[8] - month3 = tokens[9] - dom = tokens[10] - hms = tokens[11] - year4 = tokens[12] - - elif len(tokens) == 12: - #-- this is likely a folder. - stored = None - size = int(tokens[5]) - dow = tokens[6] - month3 = tokens[7] - dom = tokens[8] - hms = tokens[9] - year4 = tokens[10] - - dts = " ".join([dow, month3, dom, hms, year4]) - modified = datetime.datetime.strptime(dts, "%c") - - p = folder / name - basic = fstat(p, size, perms = perms, owner = owner, group = group, stored = stored, links = links, modified = modified) - detail = self.stat(p) - entry = detail + basic - - - entries.append( entry ) - - return (proc, entries) diff --git a/bin/fossil.py b/bin/fossil.py index 2d67aac..c2740ea 100644 --- a/bin/fossil.py +++ b/bin/fossil.py @@ -8,6 +8,42 @@ HSI = "/opt/hsi/bin/hsi" HTAR = "/opt/his/bin/htar" +""" +dataset +* path +* name (leave blank to extract default from path stem) +* redundancy (default 2) +* retainer (Fotress/HPSS login to use as owner of backup copy, leave blank for default) +""" + +#----------------------------------------------------------------------------------------------------------------------- +#-- Create an interface object using a specific Fortress/HPSS login and path to the backup job configuration workbook. | +#----------------------------------------------------------------------------------------------------------------------- +museum = Fossils(login = ndenny, conf = "idif.xlsx") #-- the default login can also be part of the conf workbook. + +#-------------------------------------------------------------------------------------------------------------- +#-- This is the "do everything" magic method that uses a lot of lower level methods to accomplish it's goal. | +#-------------------------------------------------------------------------------------------------------------- +museum.update() #-- automatically checks today's date vs. Fortress and backup schedule as described in "idif.xlsx" + +#-------------------------------------------------------------------------------------------------------------- +#-- This does a high-level backup for a single, named dataset. | +#-- The dataset is better defined in the conf workbook where we find, e.g. the path, etc. | +#-- Here, the .backup method does some work on deciding whether this should be a full or differential backup. | +#-- Full backups are stored in their own folder with all dependent differentials. | +#-- On Fortress, the dataset name is used as the folder name. | +#-- Specific backup files are named according to {slug}-F{date}-D{xxx}.tar, where ... | +#-- slug is the base32 encoding of the shake128/5 hash of the dataset's name. | +#-- backups are also annotated with provenance in more readable English text. | +#-------------------------------------------------------------------------------------------------------------- +report = museum.backup("QL2_3DEP_LiDAR_IN_2011_2013_l2") + +#-- This forces a differential backup of the given dataset RIGHT NOW. +#-- Without an optional override, default behavior is to use the most recent full backup as the basis for ths +report = museum.backup_differential("QL2_3DEP_LiDAR_IN_2011_2013_l2") + + + def backup(folder, level = 1, **kwargs):