From aedf1fbf5d850ed331c0f4462bcefaebe65cf90c Mon Sep 17 00:00:00 2001 From: Nathan Denny Date: Fri, 25 Oct 2024 13:27:07 -0400 Subject: [PATCH] adding SCURLer (sftp by libCURL) and new SFTP storage vault --- lab/fortress_example.py | 5 +- lib/Bastion/Model.py | 36 ++++- lib/Bastion/Movers/sCURL.py | 64 ++++++++- lib/Bastion/Vaults/BFD.py | 74 +++++++--- lib/Bastion/Vaults/HPSS.py | 2 +- lib/Bastion/Vaults/SFTP.py | 276 ++++++++++++++++++++++++++++++++++++ 6 files changed, 423 insertions(+), 34 deletions(-) create mode 100644 lib/Bastion/Vaults/SFTP.py diff --git a/lab/fortress_example.py b/lab/fortress_example.py index 8e98a93..9d5d93d 100644 --- a/lab/fortress_example.py +++ b/lab/fortress_example.py @@ -14,6 +14,7 @@ BLONDE = "3AQXEGFS024A03CMFZMT.tar" fortress = SCURLer(USER, 'sftp.fortress.rcac.purdue.edu', keyfile = KEYPATH, silent = False, verbose = True) -home = fortress['/home/{}'.format(USER)] -bastion = fortress['/home/{}/bastion'.format(USER)] +home = fortress / 'home' / '{}'.format(USER) +bastion = fortress / 'home' / '{}'.format(USER) / 'bastion' + diff --git a/lib/Bastion/Model.py b/lib/Bastion/Model.py index 68215a8..de0269f 100644 --- a/lib/Bastion/Model.py +++ b/lib/Bastion/Model.py @@ -129,12 +129,43 @@ def provision(self, *args): """ raise NotImplementedError + def pack(self, asset, basis = None, **kwargs): + """ + Given a local asset, I package (.tar, .zip, etc) the asset into my scratch (spool) space. + Without a given basis, I package everything (i.e. a full backup). + When a basis is given as either a datetime or an anchor (BLONDE), I do a differential backup. + I answer a tuple like (blonde, tag, repo, packaged) + Where... + * blonde is the BLONDE of the newly created archive + * tag is the relative path to the archive file + * spool is the absolute path to the folder holding the new archive file. + * package is the name of the archive file in the spool folder + """ + raise NotImplementedError + + def unpack(self, halo, root, **kwargs): + """ + Given a local packed archive object (e.g. .tar, .zip, etc.) at halo, + I unpack the archive into the given (local) root path. + """ + raise NotImplementedError + + def push(self, asset, basis = None, **kwargs): """ Given an asset, I push a backup of the asset to this vault. push(asset) - creates a full backup in this vault, creating a new base for differentials push(asset, basis) - creates a differential backup relative to the given basis. {basis} - can be a datetime or a BLONDE. + A typical implementation of .push() ... + 1. call .pack() method to create a local archive in my scratch (spool) space + 2. use .put() to transfer the local archive to the vault space + 3. perform a vault-specific transfer verification + 4. remove the local, scratch (spool) archive file. + Answers a tuple of (transferred, blonde, receipt), where... + * transferred - is the True/False indication answered by .put() + * blonde - is the string reprsentation of the blonde for the archive of the asset, + * receipt - is detailed, structured answer given by the .put() operation. """ raise NotImplementedError @@ -144,8 +175,9 @@ def pull(self, blonde, **kwargs): def put(self, halo, tag, **kwargs): """ Given path to a local file (aka Host Asset LOcation), - move the file from the local scope to this vault and store - the object at tag (the path relative to the root of this vault) + move the file from the local scope to this vault and store the object at tag (the path relative to the root of this vault) + Answers (True, receipt) or (False, receipt) where True indicates transfer success and False indicates transfer failure. + receipt is a (possibly nested) dictionary that is a more detailed (and specific to the actual transfer method) description of the transfer as executed. """ raise NotImplementedError diff --git a/lib/Bastion/Movers/sCURL.py b/lib/Bastion/Movers/sCURL.py index fa72e52..746bbff 100644 --- a/lib/Bastion/Movers/sCURL.py +++ b/lib/Bastion/Movers/sCURL.py @@ -79,6 +79,15 @@ def toJDN(self): def URL(self): return self.scurler.URL(self.rpath) + def __repr__(self): + return "[{}@{}:{}]".format(self.scurler.user, self.scurler.host, str(self.rpath)) + + def __truediv__(self, subpath): + subpath = pathlib.PurePosixPath(subpath) + if subpath.is_absolute(): + raise Exception("sub paths must be relative") + return Alien(self.scurler, self.rpath / subpath) + def __lshift__(self, lpath): """ put/upload operation @@ -100,7 +109,7 @@ def __iter__(self): Iterates over the contents of the folder, each item being a pathlib.Path object. """ - return iter(self.ls()) + return iter(self.lsall()) def ls(self): """ @@ -112,6 +121,11 @@ def ls(self): def lsall(self): return self.scurler.lsall(self.rpath) + @property + def is_dir(self): + #-- alias for is_folder + return self.is_folder + @property def is_folder(self): #-- First look at the permission bits. @@ -128,6 +142,45 @@ def is_folder(self): folderq = False return folderq + @property + def exists(self): + me = self.rpath.name + up = self.scurler / self.rpath.parent + try: + exists = any([(entry.rpath.name == me) for entry in up]) + except NotADirectoryError: + exists = False + return exists + + @property + def files(self): + """ + If I am a folder on the remote host, then I answer a list of the names of the files that I contain (without recursion). + If I am NOT a folder on the remote host, I answer an empty list. + """ + try: + entries = self.lsall() + except NotADirectoryError: + files = [ ] + else: + files = [entry.rpath.name for entry in entries if not entry.is_folder] + return files + + @property + def folders(self): + """ + If I am a folder on the remote host, then I answer a list of names for my subfolders (without recursion). + If I am NOT a folder on the remote host, I answer an empty list. + """ + try: + entries = self.lsall() + except NotADirectoryError: + folders = [ ] + else: + folders = [entry.rpath.name for entry in entries if entry.is_folder] + return folders + + def parsel(entry, rpath = None): """ @@ -189,7 +242,6 @@ def __init__(self, user, host, **kwargs): self.mkdirs = kwargs.get('mkdirs', True) #-- default to creating missing directories in upload paths. self.CURL = kwargs.get('curl', "/usr/bin/curl") - self.lastop = None #------------------------------------------------------------------------- @@ -349,8 +401,6 @@ def mkdir(self, rpath): else: return False - def __getitem__(self, rpath): - """ - Using array index semantics, I answer an "Alien" interface to the given remote path (rpath). - """ - return Alien(self, rpath) + def __truediv__(self, rpath): + root = pathlib.PurePosixPath("/") + return Alien(self, root / rpath) diff --git a/lib/Bastion/Vaults/BFD.py b/lib/Bastion/Vaults/BFD.py index b64c7bd..5086014 100644 --- a/lib/Bastion/Vaults/BFD.py +++ b/lib/Bastion/Vaults/BFD.py @@ -128,13 +128,12 @@ def provision(self, *args): else: raise ValueError - def push(self, asset, basis = None, **kwargs): + def pack(self, asset, basis = None, **kwargs): """ - Given an asset, I push a backup of the asset to this vault. - push(asset) - creates a full backup in this vault, creating a new base for differentials - push(asset, basis) - creates a differential backup relative to the given basis. - {asset} - an instance of Bastion.Model.isAsset - {basis} - can be a datetime or a BLONDE. + Given a local asset, I package (.tar, .zip, etc) the asset into my scratch (spool) space. + Without a given basis, I package everything (i.e. a full backup). + When a basis is given as either a datetime or an anchor (BLONDE), I do a differential backup. + I answer the BLONDE for the package, relative to the local scratch (spool) space. """ detail = 'F' whence = None @@ -148,14 +147,14 @@ def push(self, asset, basis = None, **kwargs): detail = 'D' if isinstance(basis, BLONDE): #-- I was given a BLONDE (a reference to a full backup) - anchor = basis - whence = anchor.when.earliest - genus = anchor.genus + anchor = basis + whence = anchor.when.earliest + genus = anchor.genus blonded = BLONDE.forDiffBackup(anchor) if isinstance(basis, datetime.datetime): - whence = basis - genus = "___" + whence = basis + genus = "___" blonded = BLONDE(asset.ARK, detail, genus) opts['since'] = whence @@ -163,31 +162,62 @@ def push(self, asset, basis = None, **kwargs): else: blonded = BLONDE.forFullBackup(asset.ARK) - tarp = "{}.tar".format(str(blonded)) + package = "{}.tar".format(str(blonded)) + tag = pathlib.PurePosixPath(ark.site) / ark.zone / ark.asset / package + spool = (self.scratch / tag).parent + + #-- assure that the scratch path exists and all of the subpaths that create the repo folder for this asset. + spool.mkdir(parents = True, exist_ok = True) + + #-- use the built-in python tar archiver, using "pax" (POSIX.1-2001) extensions. + pax((self.scratch / tag), asset.halo, **opts) + + #-- answer the BLONDE of the newly created package. + return (blonded, tag, spool, package) + - #-- assure that the scratch and bank paths exist. - (self.scratch / ark.site / ark.zone / ark.asset).mkdir(parents = True, exist_ok = True) - (self.bank / ark.site / ark.zone / ark.asset).mkdir(parents = True, exist_ok = True) + def push(self, asset, basis = None, **kwargs): + """ + Given an asset, I push a backup of the asset to this vault. + push(asset) - creates a full backup in this vault, creating a new base for differentials + push(asset, basis) - creates a differential backup relative to the given basis. + {asset} - an instance of Bastion.Model.isAsset + {basis} - can be a datetime or a BLONDE. + """ + blonde, tag, spool, package = self.pack(asset, basis, **kwargs) - tag = "{}/{}/{}/{}".format(ark.site, ark.zone, ark.asset, tarp) + #-- assure that the bank exists. + (self.bank / tag).parent.mkdir(parents = True, exist_ok = True) - pax(self.scratch / tag, asset.halo, **opts) + transferred, receipt = self.put(self.scratch / tag, tag) - self.put(self.scratch / tag, tag) + if transferred: + #-- clean up! + (self.scratch / tag).unlink() - #-- clean up! - (self.scratch / tag).unlink() + return (transferred, blonde, receipt) - return blonded def pull(self, ark, **kwargs): raise NotImplementedError + def put(self, halo, tag, **kwargs): - here = halo + here = halo there = self.bank / tag logger.debug("put source {} to {}".format(str(here), str(there))) + started = datetime.datetime.now() shutil.copy(here, there) + completed = datetime.datetime.now() + receipt = { + 'tag': str(tag), + 'source': str(here), + 'destination': str(there), + 'started': started.isoformat(), + 'completed': completed.isoformat() + } + return (True, receipt) + def get(self, tag, halo, **kwargs): shutil.copystat(self.bank / tag, halo) diff --git a/lib/Bastion/Vaults/HPSS.py b/lib/Bastion/Vaults/HPSS.py index 2a0cc09..5f18643 100644 --- a/lib/Bastion/Vaults/HPSS.py +++ b/lib/Bastion/Vaults/HPSS.py @@ -321,7 +321,7 @@ def lsx(self, path = None): class Vault(Bastion.Model.Vault): - PROTOCOL = 'HPSS' + PROTOCOL = 'HTAR' def __init__(self, name, **kwargs): Bastion.Model.Vault.__init__(self, name, **kwargs) diff --git a/lib/Bastion/Vaults/SFTP.py b/lib/Bastion/Vaults/SFTP.py new file mode 100644 index 0000000..7e48eb8 --- /dev/null +++ b/lib/Bastion/Vaults/SFTP.py @@ -0,0 +1,276 @@ +import os +import pathlib +import subprocess +import operator +import datetime +import json +import socket +import logging +import getpass +import shutil +import tarfile +import logging + +from Bastion.Common import Thing, Unknown +import Bastion.Model +from Bastion.Curator import Manifest, BLONDE, Snap +from Bastion.Utils import pax +from Bastion.Movers.sCURL import SCURLer + + +logger = logging.getLogger(__name__) + + +def asPath(x): + return pathlib.Path(x) + +def asPurePath(x): + return pathlib.PurePosixPath(x) + + +class Vault(Bastion.Model.Vault): + PROTOCOL = 'SFTP' + + def __init__(self, name, **kwargs): + Bastion.Model.Vault.__init__(self, name, **kwargs) + + self.scratch = pathlib.Path("/tmp") + self.host = None + self.login = getpass.getuser() + self.key = pathlib.Path("~/.ssh/id") + self.bank = None + + def configured(self, conf): + confkey = "vaults.{}".format(self.name) + if confkey in conf: + section = conf[confkey] + remote = section['remote'] + + #-- configuration relevant to local host. + local = section['local'] + self.scratch = local.get(asPath, "scratch", "/tmp") + + #-- Configuration relevant to remote (bank) host. + self.host = remote.get('host') + self.login = remote.get('login', getpass.getuser()) + self.key = remote.get(asPath, 'key', pathlib.Path("~/.ssh/id").expanduser()) + self.root = remote.get(asPurePath, 'root', "/") + + return self + + @property + def bank(self): + client = SCURLer(self.login, self.host, keyfile = self.key) + return (client / self.root) + +#--------------------------------------- +#-- BEGIN Bastion.Model.Vault PROTOCOL | +#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ + @property + def ARKs(self): + """ + I answer a sorted collection of all assets (ARKs) held in this vault. + """ + arks = [ ] + for site in self.sites: + for zone in self.zones(site): + for asset in self.assets(site, zone): + arks.append( ARK(site, zone, asset) ) + return tuple(sorted(arks)) + + @property + def sites(self): + """ + I answer a sorted collection of the names of all sites known to this vault. + """ + #-- sites are top level elements relative to the root of the vault. + if self.bank.exists: + for entry in self.bank: + if entry.is_folder: + sites.append(entry.rpath.name) + return tuple(sorted(sites)) + + def zones(self, site): + """ + Given a site name, I answer a sorted collection of zone names that are known to this vault. + """ + #-- a zone will be a subdirectory (subfolder) of the given site. + #-- look for all of the subfolders of root / site + zones = [ ] + sroot = self.bank / RDN(site) + if sroot.exists: + if sroot.is_folder: + for entry in sroot: + if entry.is_folder: + zones.append(entry.rpath.name) + return tuple(sorted(zones)) + + def assets(self, site, zone): + #-- assets will be subdirectories (subfolders) of a given site, zone. + assets = [ ] + zroot = self.bank / RDN(site) / RDN(zone) + if zroot.exists: + if zroot.is_folder: + assets.append(entry.name) + return tuple(sorted(assets)) + + def manifest(self, *args): + """ + I answer a manifest of the named asset... + manifest(ark) + manifest(site, zone, asset) + """ + if len(args) == 1: + arg = args[0] + if isinstance(arg, Bastion.Model.isAsset): + ark = arg.ARK + else: + ark = arg + return self._manifest_ark( ark ) + elif len(args) == 3: + return self._manifest_site_zone_asset( args[0], args[1], args[2] ) + else: + raise ValueError + + def provision(self, *args): + """ + provision(ark) - ensures that the site, zone, and asset folders exist. + provision(site, zone, asset_name) - an alias for provision(ark) + """ + if len(args) == 1: + return self._provision_ark( args[0] ) + elif len(args) == 3: + return self._provision_site_zone_asset( args[0], args[1], args[2] ) + else: + raise ValueError + + def pack(self, asset, basis = None, **kwargs): + """ + Given a local asset, I package (.tar, .zip, etc) the asset into my scratch (spool) space. + Without a given basis, I package everything (i.e. a full backup). + When a basis is given as either a datetime or an anchor (BLONDE), I do a differential backup. + I answer the BLONDE for the package, relative to the local scratch (spool) space. + """ + detail = 'F' + whence = None + basis = None + blonded = None + tarp = None + ark = asset.ARK + opts = { } + + if basis: + detail = 'D' + if isinstance(basis, BLONDE): + #-- I was given a BLONDE (a reference to a full backup) + anchor = basis + whence = anchor.when.earliest + genus = anchor.genus + blonded = BLONDE.forDiffBackup(anchor) + + if isinstance(basis, datetime.datetime): + whence = basis + genus = "___" + blonded = BLONDE(asset.ARK, detail, genus) + + opts['since'] = whence + + else: + blonded = BLONDE.forFullBackup(asset.ARK) + + package = "{}.tar".format(str(blonded)) + tag = pathlib.PurePosixPath(ark.site) / ark.zone / ark.asset / package + spool = (self.scratch / tag).parent + + #-- assure that the scratch path exists and all of the subpaths that create the repo folder for this asset. + spool.mkdir(parents = True, exist_ok = True) + + #-- use the built-in python tar archiver, using "pax" (POSIX.1-2001) extensions. + pax((self.scratch / tag), asset.halo, **opts) + + #-- answer the BLONDE of the newly created package. + return (blonded, tag, spool, package) + + + def push(self, asset, basis = None, **kwargs): + """ + Given an asset, I push a backup of the asset to this vault. + push(asset) - creates a full backup in this vault, creating a new base for differentials + push(asset, basis) - creates a differential backup relative to the given basis. + {asset} - an instance of Bastion.Model.isAsset + {basis} - can be a datetime or a BLONDE. + """ + blonde, tag, spool, package = self.pack(asset, basis, **kwargs) + + #-- assure that the bank exists. + (self.bank / tag).parent.mkdir(parents = True, exist_ok = True) + + transferred, receipt = self.put(self.scratch / tag, tag) + + if transferred: + #-- clean up! + (self.scratch / tag).unlink() + + return (transferred, blonde, receipt) + + + def pull(self, ark, **kwargs): + raise NotImplementedError + + + def put(self, halo, tag, **kwargs): + here = halo + there = self.bank / tag + logger.debug("put source {} to {}".format(str(here), str(there))) + started = datetime.datetime.now() + shutil.copy(here, there) + completed = datetime.datetime.now() + receipt = { + 'tag': str(tag), + 'source': str(here), + 'destination': str(there), + 'started': started.isoformat(), + 'completed': completed.isoformat() + } + return (True, receipt) + + + def get(self, tag, halo, **kwargs): + shutil.copystat(self.bank / tag, halo) + + +#↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ +#-- END Bastion.Model.Vault PROTOCOL | +#------------------------------------- + def _copy_blonde(self, blonde, **kwargs): + raise NotImplementedError + + def _empty_scratch(self, ark, **kwargs): + raise NotImplementedError + + def _manifest_ark(self, ark): + #-- The contents of {root}/{site}/{zone}/{asset} are backup blobs + #-- Each name in this folder is a "BLOND" (BLOb Name and Descriptor) + #-- The manifest a catalog of all of the backup objects for the asset. + cell = self.bank / ark.site / ark.zone / ark.asset + if cell.exists(): + blondes = [ ] + for item in cell.iterdir(): + if not item.is_dir(): + blondes.append( BLONDE.decode(item.stem) ) + manifest = Bastion.Curator.Manifest(ark, blondes) + + return manifest + + def _manifest_site_zone_asset(self, site, zone, asset): + return self._manifest_ark( ARK(site, zone, asset) ) + + def _provision_ark(self, ark): + repo = self.bank / ark.site / ark.zone / ark.asset + return repo.mkdir(parents = True, exist_ok = True) + + def _provision_site_zone_asset(self, site, zone, asset_name): + return self._provision_ark( ARK(site, zone, asset_name) ) + + +Vault.register()