diff --git a/lab/depot_example.py b/lab/depot_example.py new file mode 100644 index 0000000..ca4a595 --- /dev/null +++ b/lab/depot_example.py @@ -0,0 +1,38 @@ +import pathlib +import getpass +import os +import sys +import logging + +from Bastion.NetOps.sCURL import SCURLer + +logger = logging.getLogger(__name__) +logging.basicConfig(level = logging.DEBUG) + +SSHROOT = pathlib.Path("~/.ssh").expanduser() + +if len(sys.argv) > 1: + USER = sys.argv[1] +else: + USER = getpass.getuser() + +for candidate in ['bastion_bot', 'id_ecdsa', 'id_rsa']: + keypath = SSHROOT / candidate + if keypath.exists(): + logger.debug("selected key at {} for connection".format(str(keypath))) + KEYPATH = keypath + break + +LOCAL = pathlib.Path("/mnt/BFD/bastion/bank") +REMOTE = pathlib.PurePosixPath("/depot/agdata/data/bastion") + +SITE = "rusina" +ZONE = "soundscapes" +ASSET = "HackathonData" +BLONDE = "3AQXEGFS024A03CMFZMT.tar" + +depot = SCURLer('data.rcac.purdue.edu', '/depot', user = USER, keyfile = KEYPATH, silent = False, verbose = True) +agdata = depot / 'agdata' / 'data' +bastion = agdata / 'bastion' + + diff --git a/lib/Bastion/Clerks/BFD.py b/lib/Bastion/Clerks/BFD.py new file mode 100644 index 0000000..0e2819e --- /dev/null +++ b/lib/Bastion/Clerks/BFD.py @@ -0,0 +1,107 @@ +import os +import pathlib +import subprocess +import operator +import datetime +import json +import socket +import logging +import getpass +import shutil +import tarfile +import logging + +from Bastion.Common import Thing, Unknown +from Bastion.Model import isAsset, isClerk +from Bastion.Curator import Manifest, BLONDE, Snap +from Bastion.Utils import pax + + +logger = logging.getLogger(__name__) + +class Clerk(isClerk): + def __init__(self, vault, **kwargs): + isClerk.__init__(self) + self.vault = vault + + @property + def bfd(self): + return self.vault.bfd + + @property + def bank(self): + return self.vault.bank + + @property + def scratch(self): + return self.vault.scratch + +#----------------------------------------- +#-- BEGIN Bastion.Model.isClerk PROTOCOL | +#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ + @property + def sites(self): + """ + I answer a sorted collection of the names of all sites known to this vault. + """ + #-- sites are top level elements relative to the root of the vault. + sites = [ ] + for entry in self.root.iterdir(): + if entry.is_dir(): + sites.append(entry.name) + + return tuple(sorted(sites)) + + def zones(self, site): + """ + Given a site name, I answer a sorted collection of zone names that are known to this vault. + """ + #-- a zone will be a subdirectory (subfolder) of the given site. + #-- look for all of the subfolders of root / site + zones = [ ] + sroot = self.root / RDN(site) + if sroot.exists( ): + for entry in sroot.iterdir( ): + if entry.is_dir(): + zones.append(entry.name) + return tuple(sorted(zones)) + + def assets(self, site, zone): + #-- assets will be subdirectories (subfolders) of a given site, zone. + assets = [ ] + zroot = self.root / site / zone + if zroot.exists( ): + for entry in zroot.is_dir(): + assets.append(entry.name) + return tuple(sorted(assets)) + + def manifest(self, *args): + """ + I answer a manifest of the named asset... + manifest(ark) + manifest(site, zone, asset) + """ + ark = None + if len(args) == 1: + arg = args[0] + if isinstance(arg, Bastion.Model.isAsset): + ark = arg.ARK + else: + ark = arg + elif len(args) == 3: + ark = ARK(site, zone, asset) + else: + raise ValueError + + cell = self.bank / ark.site / ark.zone / ark.asset + if cell.exists(): + blondes = [ ] + for item in cell.iterdir(): + if not item.is_dir(): + blondes.append( BLONDE.decode(item.stem) ) + manifest = Bastion.Curator.Manifest(ark, blondes) + + return manifest +#↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ +#-- END Bastion.Model.isClerk PROTOCOL | +#--------------------------------------- diff --git a/lib/Bastion/Clerks/SFTP.py b/lib/Bastion/Clerks/SFTP.py new file mode 100644 index 0000000..8879fa6 --- /dev/null +++ b/lib/Bastion/Clerks/SFTP.py @@ -0,0 +1,98 @@ +import os +import pathlib +import subprocess +import operator +import datetime +import json +import socket +import logging +import getpass +import shutil +import tarfile +import logging + +from Bastion.Common import Thing, Unknown +from Bastion.Model import isAsset, isClerk +from Bastion.Curator import Manifest, BLONDE, Snap +from Bastion.Packers.TARs import pax +from Bastion.NetOps.sCURL import SCURLer + + +logger = logging.getLogger(__name__) + + +class Clerk(isClerk): + def __init__(self, vault, **kwargs): + isClerk.__init__(self) + self.vault = vault + self.scurler = SCURLer(self.vault.sfURL, keyfile = self.vault.keypath) + +#----------------------------------------- +#-- BEGIN Bastion.Model.isClerk PROTOCOL | +#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ + @property + def sites(self): + """ + I answer a sorted collection of the names of all sites known to this vault. + """ + #-- sites are top level elements relative to the root of the vault. + sites = [ ] + for alien in self.scurler.lsall(): + if alien.is_dir(): + sites.append(alien.name) + + return tuple(sorted(sites)) + + def zones(self, site): + """ + Given a site name, I answer a sorted collection of zone names that are known to this vault. + """ + #-- a zone will be a subdirectory (subfolder) of the given site. + #-- look for all of the subfolders of root / site + zones = [ ] + sroot = self.scurler / RDN(site) + if sroot.exists( ): + for alien in sroot.lsall( ): + if alien.is_dir(): + zones.append(alien.name) + return tuple(sorted(zones)) + + def assets(self, site, zone): + #-- assets will be subdirectories (subfolders) of a given site, zone. + assets = [ ] + zroot = self.scurler / site / zone + if zroot.exists( ): + for alien in zroot.is_dir(): + assets.append(alien.name) + return tuple(sorted(assets)) + + def manifest(self, *args): + """ + I answer a manifest of the named asset... + manifest(ark) + manifest(site, zone, asset) + """ + ark = None + if len(args) == 1: + arg = args[0] + if isinstance(arg, Bastion.Model.isAsset): + ark = arg.ARK + else: + ark = arg + elif len(args) == 3: + ark = ARK(site, zone, asset) + else: + raise ValueError + + cell = self.scurler / ark.site / ark.zone / ark.asset + if cell.exists(): + blondes = [ ] + for alien in cell.lsall(): + if not alien.is_dir(): + blondes.append( BLONDE.decode(alien.stem) ) + manifest = Bastion.Curator.Manifest(ark, blondes) + + return manifest +#↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ +#-- END Bastion.Model.isClerk PROTOCOL | +#--------------------------------------- diff --git a/lib/Bastion/Model.py b/lib/Bastion/Model.py index 74cbf4b..75fd6f3 100644 --- a/lib/Bastion/Model.py +++ b/lib/Bastion/Model.py @@ -103,12 +103,16 @@ class Vault(isVault): def __init__(self, name, **kwargs): self.name = name + #---------------------------------------------------------------------------- + #-- BEGIN CLERK DELEGATION | + #-- These methods are basically pass-throughs to the clerk's implementation | + #↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ @property def sites(self): """ I am the set of all sites tracked by this vault. """ - raise NotImplementedError + return self.clerk.sites @property def ARKs(self): @@ -118,27 +122,35 @@ def zones(self, site): """ I answer the set of zones for a given site. """ - raise NotImplementedError + return self.clerk.zones(site) def assets(self, zone): """ I am the set of all assets tracked by this vault. """ - raise NotImplementedError + return self.clerk.assets(zone) def manifest(self, ark): """ Given an ARK, I answer the manifest of held objects (aka BLONDEs). """ - raise NotImplementedError + return self.clerk.manifest(ark) + #↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ + #-- END CLERK DELEGATES | + #---------------------------------------------------------------------------- + + #----------------------------------------------------------- + #-- BEGIN MOVER DELEGATES | + #-- These methods are pass throughs to the attached mover. | + #↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ def provision(self, *args): """ provision(ark) - ensures that the site, zone, and asset folders exist. provision(site, zone, asset_name) - an alias for provision(ark) provision(asset) - given an instance of Asset, provision the necessary site, zone, and asset folders. """ - raise NotImplementedError + return self.mover.provision(*args, **kwargs) def pack(self, asset, basis = None, **kwargs): """ @@ -152,14 +164,14 @@ def pack(self, asset, basis = None, **kwargs): * spool is the absolute path to the folder holding the new archive file. * package is the name of the archive file in the spool folder """ - raise NotImplementedError + self.mover.pack(asset, basis, **kwargs) def unpack(self, halo, root, **kwargs): """ Given a local packed archive object (e.g. .tar, .zip, etc.) at halo, I unpack the archive into the given (local) root path. """ - raise NotImplementedError + return self.mover.unpack(halo, root, **kwargs) def push(self, asset, basis = None, **kwargs): @@ -178,10 +190,10 @@ def push(self, asset, basis = None, **kwargs): * blonde - is the string reprsentation of the blonde for the archive of the asset, * receipt - is detailed, structured answer given by the .put() operation. """ - raise NotImplementedError + return self.mover.push(asset, basis, **kwargs) def pull(self, blonde, **kwargs): - raise NotImplementedError + return self.mover.pull(blonde, **kwargs) def put(self, halo, tag, **kwargs): """ @@ -190,14 +202,18 @@ def put(self, halo, tag, **kwargs): Answers (True, receipt) or (False, receipt) where True indicates transfer success and False indicates transfer failure. receipt is a (possibly nested) dictionary that is a more detailed (and specific to the actual transfer method) description of the transfer as executed. """ - raise NotImplementedError + return self.mover.put(halo, tag, **kwargs) def get(self, tag, halo, **kwargs): """ Given a tag (the path relative to the root of this vault), download the object and store it in the local file designated by halo. """ - raise NotImplementedError + return self.mover.get(tag, halo, **kwargs) + + #↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ + #-- END CLERK DELEGATES | + #---------------------------------------------------------------------------- def configured(self, conf): raise NotImplementedError @@ -301,6 +317,125 @@ class isSite: pass +class isClerk: + """ + abstract class for metadata and file management specific to the capabilities of a given vault type. + """ + @property + def ARKs(self): + """ + I answer a sorted collection of all assets (ARKs) held in this vault. + """ + arks = [ ] + for site in self.sites: + for zone in self.zones(site): + for asset in self.assets(site, zone): + arks.append( ARK(site, zone, asset) ) + return tuple(sorted(arks)) + + @property + def sites(self): + """ + I am the set of all sites tracked by this vault. + """ + raise NotImplementedError + + def zones(self, site): + """ + I answer the set of zones for a given site. + """ + raise NotImplementedError + + def assets(self, zone): + """ + I am the set of all assets tracked by this vault. + """ + raise NotImplementedError + + def manifest(self, ark): + """ + Given an ARK, I answer the manifest of held objects (aka BLONDEs). + """ + raise NotImplementedError + + +class isMover: + """ + abstract class for file movement in to and out of a specific vault type. + """ + def provision(self, *args): + """ + provision(ark) - ensures that the site, zone, and asset folders exist. + provision(site, zone, asset_name) - an alias for provision(ark) + provision(asset) - given an instance of Asset, provision the necessary site, zone, and asset folders. + """ + raise NotImplementedError + + def pack(self, asset, basis = None, **kwargs): + """ + Given a local asset, I package (.tar, .zip, etc) the asset into my scratch (spool) space. + Without a given basis, I package everything (i.e. a full backup). + When a basis is given as either a datetime or an anchor (BLONDE), I do a differential backup. + I answer a tuple like (blonde, tag, repo, packaged) + Where... + * blonde is the BLONDE of the newly created archive + * tag is the relative path to the archive file + * spool is the absolute path to the folder holding the new archive file. + * package is the name of the archive file in the spool folder + """ + raise NotImplementedError + + def unpack(self, halo, root, **kwargs): + """ + Given a local packed archive object (e.g. .tar, .zip, etc.) at halo, + I unpack the archive into the given (local) root path. + """ + raise NotImplementedError + + def push(self, asset, basis = None, **kwargs): + """ + Given an asset, I push a backup of the asset to this vault. + push(asset) - creates a full backup in this vault, creating a new base for differentials + push(asset, basis) - creates a differential backup relative to the given basis. + {basis} - can be a datetime or a BLONDE. + A typical implementation of .push() ... + 1. call .pack() method to create a local archive in my scratch (spool) space + 2. use .put() to transfer the local archive to the vault space + 3. perform a vault-specific transfer verification + 4. remove the local, scratch (spool) archive file. + Answers a tuple of (transferred, blonde, receipt), where... + * transferred - is the True/False indication answered by .put() + * blonde - is the string reprsentation of the blonde for the archive of the asset, + * receipt - is detailed, structured answer given by the .put() operation. + """ + raise NotImplementedError + + def pull(self, blonde, **kwargs): + raise NotImplementedError + + def put(self, halo, tag, **kwargs): + """ + Given path to a local file (aka Host Asset LOcation), + move the file from the local scope to this vault and store the object at tag (the path relative to the root of this vault) + Answers (True, receipt) or (False, receipt) where True indicates transfer success and False indicates transfer failure. + receipt is a (possibly nested) dictionary that is a more detailed (and specific to the actual transfer method) description of the transfer as executed. + """ + raise NotImplementedError + + def get(self, tag, halo, **kwargs): + """ + Given a tag (the path relative to the root of this vault), + download the object and store it in the local file designated by halo. + """ + raise NotImplementedError + + def remove(self, tag): + """ + Given a tag (the path relative to the root of this vault), + I remove the object with the given tag (i.e. path). + """ + raise NotImplementedError + #-- Monkey patch the "entity" class for syntax sugar. def entity_isSite(self): return isinstance(self.subject, isSite) @@ -317,6 +452,12 @@ def entity_isARK(self): def entity_isVault(self): return isinstance(self.subject, isVault) +def entity_isClerk(self): + return isinstance(self.subject, isClerk) + +def entity_isMover(self): + return isinstance(self.subject, isMover) + entity.isAsset = property(entity_isAsset) entity.isZone = property(entity_isZone) entity.isSite = property(entity_isSite) diff --git a/lib/Bastion/Movers/BFD.py b/lib/Bastion/Movers/BFD.py new file mode 100644 index 0000000..6f8ab1b --- /dev/null +++ b/lib/Bastion/Movers/BFD.py @@ -0,0 +1,150 @@ +import os +import pathlib +import operator +import datetime +import logging +import getpass +import shutil +import logging + +from Bastion.Common import Thing, Unknown +from Bastion.Utils import pax +from Bastion.Model import isMover +from Bastion.Curator import Manifest, BLONDE, Snap + + +logger = logging.getLogger(__name__) + + +class Mover(Bastion.Model.isMover): + def __init__(self, vault, **kwargs): + isMover.__init__(self) + self.vault = vault + + @property + def bfd(self): + return self.vault.bfd + + @property + def bank(self): + return self.vault.bank + + @property + def scratch(self): + return self.vault.scratch + +#----------------------------------------- +#-- BEGIN Bastion.Model.isMover PROTOCOL | +#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ + def provision(self, *args): + """ + provision(ark) - ensures that the site, zone, and asset folders exist. + provision(site, zone, asset_name) - an alias for provision(ark) + """ + ark = None + if len(args) == 1: + ark = args[0] + elif len(args) == 3: + ark = ARK(args[0], args[1], args[2]) + else: + raise ValueError + + repo = self.bank / ark.site / ark.zone / ark.asset + return repo.mkdir(parents = True, exist_ok = True) + + + def pack(self, asset, basis = None, **kwargs): + """ + Given a local asset, I package (.tar, .zip, etc) the asset into my scratch (spool) space. + Without a given basis, I package everything (i.e. a full backup). + When a basis is given as either a datetime or an anchor (BLONDE), I do a differential backup. + I answer the BLONDE for the package, relative to the local scratch (spool) space. + """ + detail = 'F' + whence = None + basis = None + blonded = None + tarp = None + ark = asset.ARK + opts = { } + + if basis: + detail = 'D' + if isinstance(basis, BLONDE): + #-- I was given a BLONDE (a reference to a full backup) + anchor = basis + whence = anchor.when.earliest + genus = anchor.genus + blonded = BLONDE.forDiffBackup(anchor) + + if isinstance(basis, datetime.datetime): + whence = basis + genus = "___" + blonded = BLONDE(asset.ARK, detail, genus) + + opts['since'] = whence + + else: + blonded = BLONDE.forFullBackup(asset.ARK) + + package = "{}.tar".format(str(blonded)) + tag = pathlib.PurePosixPath(ark.site) / ark.zone / ark.asset / package + spool = (self.scratch / tag).parent + + #-- assure that the scratch path exists and all of the subpaths that create the repo folder for this asset. + spool.mkdir(parents = True, exist_ok = True) + + #-- use the built-in python tar archiver, using "pax" (POSIX.1-2001) extensions. + pax((self.scratch / tag), asset.halo, **opts) + + #-- answer the BLONDE of the newly created package. + return (blonded, tag, spool, package) + + + def push(self, asset, basis = None, **kwargs): + """ + Given an asset, I push a backup of the asset to this vault. + push(asset) - creates a full backup in this vault, creating a new base for differentials + push(asset, basis) - creates a differential backup relative to the given basis. + {asset} - an instance of Bastion.Model.isAsset + {basis} - can be a datetime or a BLONDE. + """ + blonde, tag, spool, package = self.pack(asset, basis, **kwargs) + + #-- assure that the bank exists. + (self.bank / tag).parent.mkdir(parents = True, exist_ok = True) + + transferred, receipt = self.put(self.scratch / tag, tag) + + if transferred: + #-- clean up! + (self.scratch / tag).unlink() + + return (transferred, blonde, receipt) + + + def pull(self, ark, **kwargs): + raise NotImplementedError + + + def put(self, halo, tag, **kwargs): + here = halo + there = self.bank / tag + logger.debug("put source {} to {}".format(str(here), str(there))) + started = datetime.datetime.now() + shutil.copy(here, there) + completed = datetime.datetime.now() + receipt = { + 'tag': str(tag), + 'source': str(here), + 'destination': str(there), + 'started': started.isoformat(), + 'completed': completed.isoformat() + } + return (True, receipt) + + def get(self, tag, halo, **kwargs): + shutil.copystat(self.bank / tag, halo) +#↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ +#-- END Bastion.Model.isMover PROTOCOL | +#--------------------------------------- diff --git a/lib/Bastion/Movers/SFTP.py b/lib/Bastion/Movers/SFTP.py new file mode 100644 index 0000000..997daf1 --- /dev/null +++ b/lib/Bastion/Movers/SFTP.py @@ -0,0 +1,179 @@ +import os +import pathlib +import subprocess +import operator +import datetime +import json +import socket +import logging +import getpass +import shutil +import tarfile +import logging + +from Bastion.Common import Thing, Unknown +import Bastion.Model +from Bastion.Curator import Manifest, BLONDE, Snap + + +logger = logging.getLogger(__name__) + + +def asPath(x): + return pathlib.Path(x) + +def asPurePath(x): + return pathlib.PurePosixPath(x) + + +class Mover(Bastion.Model.isMover): + def __init__(self, vault, **kwargs): + isClerk.__init__(self) + self.vault = vault + self.scurler = SCURLer(self.vault.sfURL, keyfile = self.vault.keypath) + + @property + def mover(self): + if getattr(self, '_mover', None) is None: + self._mover = Mover(self, self.host, self.login, self.keypath) + return self._mover + + @property + def clerk(self): + if getattr(self, '_clerk', None) is None: + self._clerk = Clerk(self, target, self.host, self.login, self.keypath) + return self._clerk + + @property + def bank(self): + client = SCURLer(self.login, self.host, self.root, keyfile = self.key) + +#----------------------------------------- +#-- BEGIN Bastion.Model.isMover PROTOCOL | +#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ + def provision(self, *args): + """ + provision(ark) - ensures that the site, zone, and asset folders exist. + provision(site, zone, asset_name) - an alias for provision(ark) + """ + ark = None + if len(args) == 1: + ark = args[0] + elif len(args) == 3: + ark = ARK(args[0], args[1], args[2]) + else: + raise ValueError + + repo = self.scurler / ark.site / ark.zone / ark.asset + return repo.mkdir(parents = True, exist_ok = True) + + + def pack(self, asset, basis = None, **kwargs): + """ + Given a local asset, I package (.tar, .zip, etc) the asset into my scratch (spool) space. + Without a given basis, I package everything (i.e. a full backup). + When a basis is given as either a datetime or an anchor (BLONDE), I do a differential backup. + I answer an instance of Bastion.Model.PackingReceipt + """ + detail = 'F' + whence = None + basis = None + blonded = None + tarp = None + ark = asset.ARK + opts = { } + + if basis: + detail = 'D' + if isinstance(basis, BLONDE): + #-- I was given a BLONDE (a reference to a full backup) + anchor = basis + whence = anchor.when.earliest + genus = anchor.genus + blonded = BLONDE.forDiffBackup(anchor) + + if isinstance(basis, datetime.datetime): + whence = basis + genus = "___" + blonded = BLONDE(asset.ARK, detail, genus) + + opts['since'] = whence + + else: + blonded = BLONDE.forFullBackup(asset.ARK) + + package = "{}.tar".format(str(blonded)) + tag = pathlib.PurePosixPath(ark.site) / ark.zone / ark.asset / package + spooled = (self.scratch / tag) + spool = spooled.parent + + #-- assure that the scratch path exists and all of the subpaths that create the repo folder for this asset. + spool.mkdir(parents = True, exist_ok = True) + + #-- use the built-in python tar archiver, using "pax" (POSIX.1-2001) extensions. + pax((self.scratch / tag), asset.halo, **opts) + + #-- start a receipt for packing the asset. + receipt = Bastion.Model.PackingReceipt(asset, blonded, spooled) + + #-- some optional information that might be of use... + receipt['size'] = spooled.stat().st_size + receipt['tag'] = str(tag) + + #-- Answer the receipt. + return receipt + + + def push(self, asset, basis = None, **kwargs): + """ + Given an asset, I push a backup of the asset to this vault. + push(asset) - creates a full backup in this vault, creating a new base for differentials + push(asset, basis) - creates a differential backup relative to the given basis. + {asset} - an instance of Bastion.Model.isAsset + {basis} - can be a datetime or a BLONDE. + """ + packed = self.pack(asset, basis, **kwargs) + + xferrd = self.put(packed.spooled, packed.opts['tag']) + + if transferred: + #-- clean up! + (self.scratch / tag).unlink() + + return (transferred, blonde, receipt) + + def pull(self, ark, **kwargs): + raise NotImplementedError + + def put(self, halo, tag, **kwargs): + here = halo + there = self.bank / tag + logger.debug("put source {} to {}".format(str(here), str(there))) + started = datetime.datetime.now() + + completed = datetime.datetime.now() + receipt = { + 'tag': str(tag), + 'source': str(here), + 'URL': str(there), + 'started': started.isoformat(), + 'completed': completed.isoformat() + } + return (True, receipt) + + def get(self, tag, halo, **kwargs): + shutil.copystat(self.bank / tag, halo) + + +#↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ +#-- END Bastion.Model.Vault PROTOCOL | +#------------------------------------- + def _provision_ark(self, ark): + repo = self.bank / ark.site / ark.zone / ark.asset + return repo.mkdir(parents = True, exist_ok = True) + + def _provision_site_zone_asset(self, site, zone, asset_name): + return self._provision_ark( ARK(site, zone, asset_name) ) + + +Vault.register() diff --git a/lib/Bastion/NetOps/__init__.py b/lib/Bastion/NetOps/__init__.py new file mode 100644 index 0000000..fcf049d --- /dev/null +++ b/lib/Bastion/NetOps/__init__.py @@ -0,0 +1,5 @@ +""" +Bastion.NetOps + +A module holding a collection of network transfer utilities, etc. +""" diff --git a/lib/Bastion/Movers/sCURL.py b/lib/Bastion/NetOps/sCURL.py similarity index 76% rename from lib/Bastion/Movers/sCURL.py rename to lib/Bastion/NetOps/sCURL.py index ffcd13b..5b6fdae 100644 --- a/lib/Bastion/Movers/sCURL.py +++ b/lib/Bastion/NetOps/sCURL.py @@ -7,12 +7,15 @@ import pathlib import datetime import sys +import getpass +import operator import logging logger = logging.getLogger(__name__) logging.basicConfig(level = logging.DEBUG) + nMONTH = { 'Jan': 1, 'Feb': 2, @@ -29,6 +32,51 @@ } +#class isRemotePosixFileSystem: +# def ls(self, *args, **kwargs): +# raise NotImplementedError +# +# def + + +class sfURL(tuple): + """ + secure file URL + a small class that can hold connection information + """ + def __new__(cls, *args, **kwargs): + #-- Can be called as... + #-- sfCURL(instance_of_sfCURL) + #-- sfCURL(host, path, **kwargs) + if (len(args) == 1) and isinstance(args[0], cls): + return args[0] + + host = args[0] + path = pathlib.PurePosixPath("/") + port = kwargs.get('port', 22) + user = kwargs.get('user', getpass.getuser()) + + if len(args) > 1: + path = pathlib.PurePosixPath(args[1]) + + return tuple.__new__(cls, (host, port, user, path)) + + host = property(operator.itemgetter(0)) + port = property(operator.itemgetter(1)) + user = property(operator.itemgetter(2)) + path = property(operator.itemgetter(3)) + + def __str__(self): + if self.port != 22: + return "sftp://{}@{}:{}{}".format(self.user, self.host, self.port, str(self.path)) + else: + return "sftp://{}@{}{}".format(self.user, self.host, str(self.path)) + + def __truediv__(self, subpath): + path = self.path / subpath + return sfURL(self.host, path, user = user, port = port) + + class Alien: """ I am an accessor to a remotely hosted object (i.e. a resource path on a remote host). @@ -121,11 +169,49 @@ def ls(self): def lsall(self): return self.scurler.lsall(self.rpath) + def mkdir(self): + return self.scurler.mkdir(self.rpath) + +#----------------------------------------- +#-- BEGIN pathlib.Path emulation methods | +#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ + def iterdir(self): + return self.scurler.lsall(self.rpath) + + @property + def name(self): + return self.rpath.name + + @property + def suffix(self): + return self.rpath.suffix + + @property + def stem(self): + return self.rpath.stem + + @property + def parent(self): + return self.rpath.parent + @property def is_dir(self): #-- alias for is_folder return self.is_folder + @property + def exists(self): + me = self.rpath.name + up = self.scurler / self.rpath.parent + try: + exists = any([(entry.rpath.name == me) for entry in up]) + except NotADirectoryError: + exists = False + return exists +#↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ +#-- END pathlib.Path emulation methods | +#--------------------------------------- + @property def is_folder(self): #-- First look at the permission bits. @@ -142,16 +228,6 @@ def is_folder(self): folderq = False return folderq - @property - def exists(self): - me = self.rpath.name - up = self.scurler / self.rpath.parent - try: - exists = any([(entry.rpath.name == me) for entry in up]) - except NotADirectoryError: - exists = False - return exists - @property def files(self): """ @@ -189,9 +265,6 @@ def parsel(entry, rpath = None): drwxr-xr-x 2 ndenny student 2 Apr 17 14:12 Wolfram Mathematica """ fields = dict(enumerate(entry.split())) - print(">>>{}<<<".format(entry)) - for k in sorted(fields.keys()): - print("{} --> {}".format(k, fields[k])) permits = fields[0] owner = fields[2] group = fields[3] @@ -224,26 +297,27 @@ def parsel(entry, rpath = None): return parsed + class SCURLer: """ I am a client for executing single SFTP operations on a remote host via CURL. I execute all of my SFTP operations by executing CURL in a subshell. """ - def __init__(self, user, host, root = None, **kwargs): - self.host = host - self.user = user - self.root = pathlib.PurePosixPath("/") + def __init__(self, *args, **kwargs): + conf = sfURL(*args, **kwargs) - if root is not None: - self.root = pathlib.PurePosixPath(root) + self.host = conf.host + self.user = conf.user + self.root = conf.path + self.port = conf.port #-------------------------------------------------------------- #-- Configure while looking for additional keyword arguments. | #-------------------------------------------------------------- self.keypath = kwargs.get('keyfile', None) - self.silent = kwargs.get('silent', True) #-- default to silent mode - self.verbose = kwargs.get('verbose', False) #-- default to terse output mode - self.mkdirs = kwargs.get('mkdirs', True) #-- default to creating missing directories in upload paths. + self.silent = kwargs.get('silent', True) #-- default to silent mode + self.verbose = kwargs.get('verbose', False) #-- default to terse output mode + self.mkdirs = kwargs.get('mkdirs', True) #-- default to creating missing directories in upload paths. self.CURL = kwargs.get('curl', "/usr/bin/curl") self.lastop = None @@ -264,25 +338,27 @@ def __init__(self, user, host, root = None, **kwargs): self.basecom = tuple(basecom) def URL(self, reqpath = None): + return str( self.sfURL(reqpath) ) + + def sfURL(self, reqpath = None): """ Given a path (relative to my root) on the remote host (rpath), I construct an SFTP URL to the remote path. """ if reqpath is not None: reqpath = pathlib.PurePosixPath(reqpath) if reqpath.is_absolute(): - raise ValueError("rpath {} must be a relative path".format(str(reqpath))) + #-- Absolute paths are acceptable so long as they include the root path of the target + reqpath = reqpath.relative_to(self.root) rpath = self.root / reqpath else: rpath = self.root - return "sftp://{}@{}{}".format(self.user, self.host, str(rpath)) + return sfURL(self.host, rpath, user = self.user, port = self.port) - def ls(self, rpath): + def ls(self, rpath = None): """ Given an absolute path on the remote host (rpath), I execute a remote "ls" operation. """ - rpath = pathlib.PurePosixPath(rpath) - - lsurl = "{}/".format(self.URL(rpath)) + lsurl = "{}/".format( self.URL(rpath) ) lscom = list(self.basecom) lscom.append('--list-only') @@ -300,13 +376,11 @@ def ls(self, rpath): print(p.stderr) raise NotADirectoryError - def lsall(self, rpath): + def lsall(self, rpath = None): """ Given an absolute path on the remote host (rpath), I execute a remote "ls" operation. """ - rpath = pathlib.PurePosixPath(rpath) - - lsurl = "{}/".format(self.URL(rpath)) + lsurl = "{}/".format( self.URL(rpath) ) lscom = list(self.basecom) lscom.append(lsurl) @@ -357,8 +431,6 @@ def get(self, rpath, lpath = None): By default, the file is downloaded into the cwd as a file of the same name; however, I can also be given a full local path (lpath) as the location to download the file. """ - rpath = pathlib.PurePosixPath(rpath) - if lpath is None: target = pathlib.Path.cwd() / rpath.name else: @@ -388,15 +460,13 @@ def mkdir(self, rpath): NOTE: when uploading files to a remote host, folders are typically automatically created during the upload, making the method somewhat redundant or unnecessary for upload operations. """ - rpath = pathlib.PurePosixPath(rpath) - if not rpath.is_absolute(): - raise ValueError("remote path {} must be an absolute path to make".format(rpath)) + url = self.sfURL(rpath) mkcom = list(self.basecom) - mkcom.append(self.URL()) + mkcom.append(str(url)) mkcom.append('-Q') - mkcom.append('mkdir {}'.format(str(rpath))) - print(mkcom) + mkcom.append('mkdir {}'.format(str(url.path))) + logger.debug("mkdir {}".format(str(url.path))) p = subprocess.run(mkcom, capture_output = True) self.lastop = p @@ -406,5 +476,4 @@ def mkdir(self, rpath): return False def __truediv__(self, rpath): - root = pathlib.PurePosixPath("/") - return Alien(self, root / rpath) + return Alien(self, self.root / rpath) diff --git a/lib/Bastion/Packers/TARs.py b/lib/Bastion/Packers/TARs.py new file mode 100644 index 0000000..8609469 --- /dev/null +++ b/lib/Bastion/Packers/TARs.py @@ -0,0 +1,71 @@ +""" +Bastion.Packers.TARs + +I contain functions, helpers, etc. for doing tar (tape archive) work. +""" +import datetime +import pathlib +import tarfile +import logging + +from Bastion.Common import * + +logger = logging.getLogger(__name__) + + +class ifFileChanged: + """ + A class that can create callable instances to check + tarinfo metadata blocks for file modifications since a given data. + this is a filter for use with the python tarfile module and follows + the filter convention used where the filter modifies in-place the + given tarinfo object, or returns None to signal that the file should + be skipped. + e.g. + filter = ifChangedSince( datetime.datetime(2024,1,1) ) + if filter(tarinfo): + #-- add to tar file + else: + #-- return None + """ + def __init__(self, when): + self.whence = when + + def __call__(self, tarinfo): + if tarinfo.type not in (tarfile.REGTYPE, tarfile.AREGTYPE): + return tarinfo + + then = datetime.datetime.fromtimestamp(tarinfo.mtime) + logger.debug("comparing file {} mod'd at {} to change limit at {}".format(tarinfo.name, then.isoformat(), self.whence.isoformat())) + if then > self.whence: + return tarinfo + else: + #-- returning None is rejecting this file for inclusion + #-- in the accumulating tar file. + return None + + +def pax(tarp, asset, **kwargs): + """ + pax uses the python tarfile module to create a tar file using the + extended POSIX.1-2001 (aka "PAX") format. + gnutar (as of the date of writing, 2024-09-04) can read PAX format, but still defaults to creating archives using its own modified USTAR format. + {tarp} is the halo (path) to the local file where the tar will be built. + {asset} can be a file path or an instance of Bastion.Model.isAsset (e.g. Bastion.Site.Asset) + differential backups can be done by supplying the "since" keyword set the datetime which is the earliest allowed modification time for files to be admitted into the tar. + """ + if entity(asset).isAsset: + src = asset.halo + else: + src = pathlib.Path(asset) + + with tarfile.open(tarp, "w", format = tarfile.PAX_FORMAT) as tar: + if 'since' in kwargs: + when = kwargs['since'] + tar.add(src, filter = ifFileChanged(when)) + else: + tar.add(src) + + #-- if no exceptions were generated during the tar construction, + #-- then we get here and we can return a happy True! + return True diff --git a/lib/Bastion/Packers/__init__.py b/lib/Bastion/Packers/__init__.py new file mode 100644 index 0000000..573fc4e --- /dev/null +++ b/lib/Bastion/Packers/__init__.py @@ -0,0 +1,5 @@ +""" +Bastion.Packers + +I am a module containing tools to create archive objects, e.g. tar, zip, etc. +""" diff --git a/lib/Bastion/Utils.py b/lib/Bastion/Utils.py index 4f5694f..ef1d3a2 100644 --- a/lib/Bastion/Utils.py +++ b/lib/Bastion/Utils.py @@ -3,67 +3,6 @@ I mostly contain "helper" functions that are agnostic to the vault protocol, etc. """ -import datetime -import pathlib -import tarfile import logging -import Bastion.Model - logger = logging.getLogger(__name__) - -class ifFileChanged: - """ - A class that can create callable instances to check - tarinfo metadata blocks for file modifications since a given data. - this is a filter for use with the python tarfile module and follows - the filter convention used where the filter modifies in-place the - given tarinfo object, or returns None to signal that the file should - be skipped. - e.g. - filter = ifChangedSince( datetime.datetime(2024,1,1) ) - if filter(tarinfo): - #-- add to tar file - else: - #-- return None - """ - def __init__(self, when): - self.whence = when - - def __call__(self, tarinfo): - if tarinfo.type not in (tarfile.REGTYPE, tarfile.AREGTYPE): - return tarinfo - - then = datetime.datetime.fromtimestamp(tarinfo.mtime) - logger.debug("comparing file {} mod'd at {} to change limit at {}".format(tarinfo.name, then.isoformat(), self.whence.isoformat())) - if then > self.whence: - return tarinfo - else: - #-- returning None is rejecting this file for inclusion - #-- in the accumulating tar file. - return None - -def pax(tarp, asset, **kwargs): - """ - pax uses the python tarfile module to create a tar file using the - extended POSIX.1-2001 (aka "PAX") format. - gnutar (as of the date of writing, 2024-09-04) can read PAX format, but still defaults to creating archives using its own modified USTAR format. - {tarp} is the halo (path) to the local file where the tar will be built. - {asset} can be a file path or an instance of Bastion.Model.isAsset (e.g. Bastion.Site.Asset) - differential backups can be done by supplying the "since" keyword set the datetime which is the earliest allowed modification time for files to be admitted into the tar. - """ - if isinstance(asset, Bastion.Model.isAsset): - src = asset.halo - else: - src = pathlib.Path(asset) - - with tarfile.open(tarp, "w", format = tarfile.PAX_FORMAT) as tar: - if 'since' in kwargs: - when = kwargs['since'] - tar.add(src, filter = ifFileChanged(when)) - else: - tar.add(src) - - #-- if no exceptions were generated during the tar construction, - #-- then we get here and we can return a happy True! - return True diff --git a/lib/Bastion/Vaults/BFD.py b/lib/Bastion/Vaults/BFD.py index 877b441..6172fca 100644 --- a/lib/Bastion/Vaults/BFD.py +++ b/lib/Bastion/Vaults/BFD.py @@ -11,10 +11,11 @@ import tarfile import logging -from Bastion.Common import Thing, Unknown +from Bastion.Common import Thing, Unknown import Bastion.Model -from Bastion.Curator import Manifest, BLONDE, Snap -from Bastion.Utils import pax +from Bastion.Curator import Manifest, BLONDE, Snap +from Bastion.Clerks.BFD import Clerk +from Bastion.Movers.BFD import Mover logger = logging.getLogger(__name__) @@ -42,27 +43,17 @@ def configured(self, conf): if 'scratch.path' in section: self.scratch = pathlib.Path( section['scratch.path'] ) - if 'tarx' in section: - self.tarx = section['tarx'] - return self -#--------------------------------------- -#-- BEGIN Bastion.Model.Vault PROTOCOL | -#↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ - @property - def ARKs(self): - """ - I answer a sorted collection of all assets (ARKs) held in this vault. - """ - arks = [ ] - for site in self.sites: - for zone in self.zones(site): - for asset in self.assets(site, zone): - arks.append( ARK(site, zone, asset) ) - return tuple(sorted(arks)) + def changed(self, *args, **kwargs): + #-- React to internal events that can invalidate cached items, etc. + for aspect in args: + if aspect in ('clerk', 'mover'): + setattr(self, '_{}'.format(aspect), None) + return self @property +<<<<<<< HEAD def sites(self): """ I answer a sorted collection of the names of all sites known to this vault. @@ -236,40 +227,22 @@ def put(self, halo, tag, **kwargs): def get(self, tag, halo, **kwargs): shutil.copystat(self.bank / tag, halo) +======= + def clerk(self): + if getattr(self, '_clerk', None) is None: + self._clerk = Clerk(self) + return self._clerk +>>>>>>> 2e61ff33808d7cd60a7e31a3942455dfa0d7b0ce + @property + def mover(self): + if getatrr(self, '_mover', None) is None: + self._mover = Mover(self) + return self._mover #↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑ #-- END Bastion.Model.Vault PROTOCOL | #------------------------------------- - def _copy_blonde(self, blonde, **kwargs): - raise NotImplementedError - - def _empty_scratch(self, ark, **kwargs): - raise NotImplementedError - - def _manifest_ark(self, ark): - #-- The contents of {root}/{site}/{zone}/{asset} are backup blobs - #-- Each name in this folder is a "BLOND" (BLOb Name and Descriptor) - #-- The manifest a catalog of all of the backup objects for the asset. - cell = self.bank / ark.site / ark.zone / ark.asset - if cell.exists(): - blondes = [ ] - for item in cell.iterdir(): - if not item.is_dir(): - blondes.append( BLONDE.decode(item.stem) ) - manifest = Bastion.Curator.Manifest(ark, blondes) - - return manifest - - def _manifest_site_zone_asset(self, site, zone, asset): - return self._manifest_ark( ARK(site, zone, asset) ) - - def _provision_ark(self, ark): - repo = self.bank / ark.site / ark.zone / ark.asset - return repo.mkdir(parents = True, exist_ok = True) - - def _provision_site_zone_asset(self, site, zone, asset_name): - return self._provision_ark( ARK(site, zone, asset_name) ) Vault.register() diff --git a/lib/Bastion/Vaults/SFTP.py b/lib/Bastion/Vaults/SFTP.py index 9f30b0a..c6da1b1 100644 --- a/lib/Bastion/Vaults/SFTP.py +++ b/lib/Bastion/Vaults/SFTP.py @@ -11,11 +11,11 @@ import tarfile import logging -from Bastion.Common import Thing, Unknown -import Bastion.Model -from Bastion.Curator import Manifest, BLONDE, Snap -from Bastion.Utils import pax -from Bastion.Movers.sCURL import SCURLer +from Bastion.Common import Thing, Unknown +import Bastion.Model +from Bastion.Curator import Manifest, BLONDE, Snap +from Bastion.Movers.sCURL import Mover +from Bastion.Clerks.sCURL import Clerk logger = logging.getLogger(__name__) @@ -28,7 +28,6 @@ def asPurePath(x): return pathlib.PurePosixPath(x) - class Vault(Bastion.Model.Vault): PROTOCOL = 'SFTP' @@ -40,6 +39,8 @@ def __init__(self, name, **kwargs): self.login = getpass.getuser() self.keypath = pathlib.Path("~/.ssh/id") self.bank = None + self._mover = None #-- cached instance of Mover + self._clerk = None #-- cached instance of Clerk def configured(self, conf): confkey = "vaults.{}".format(self.name) @@ -59,6 +60,24 @@ def configured(self, conf): return self + def changed(self, *args, **kwargs): + for aspect in args: + if aspect in ('mover', 'clerk'): + setattr(self, '_{}'.format(aspect), None) + return self + + @property + def mover(self): + if getattr(self, '_mover', None) is None: + self._mover = Mover(self, self.host, self.login, self.keypath) + return self._mover + + @property + def clerk(self): + if getattr(self, '_clerk', None) is None: + self._clerk = Clerk(self, target, self.host, self.login, self.keypath) + return self._clerk + @property def bank(self): client = SCURLer(self.login, self.host, self.root, keyfile = self.key) @@ -66,72 +85,6 @@ def bank(self): #--------------------------------------- #-- BEGIN Bastion.Model.Vault PROTOCOL | #↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ - @property - def ARKs(self): - """ - I answer a sorted collection of all assets (ARKs) held in this vault. - """ - arks = [ ] - for site in self.sites: - for zone in self.zones(site): - for asset in self.assets(site, zone): - arks.append( ARK(site, zone, asset) ) - return tuple(sorted(arks)) - - @property - def sites(self): - """ - I answer a sorted collection of the names of all sites known to this vault. - """ - #-- sites are top level elements relative to the root of the vault. - if self.bank.exists: - for entry in self.bank: - if entry.is_folder: - sites.append(entry.rpath.name) - return tuple(sorted(sites)) - - def zones(self, site): - """ - Given a site name, I answer a sorted collection of zone names that are known to this vault. - """ - #-- a zone will be a subdirectory (subfolder) of the given site. - #-- look for all of the subfolders of root / site - zones = [ ] - sroot = self.bank / RDN(site) - if sroot.exists: - if sroot.is_folder: - for entry in sroot: - if entry.is_folder: - zones.append(entry.rpath.name) - return tuple(sorted(zones)) - - def assets(self, site, zone): - #-- assets will be subdirectories (subfolders) of a given site, zone. - assets = [ ] - zroot = self.bank / RDN(site) / RDN(zone) - if zroot.exists: - if zroot.is_folder: - assets.append(entry.name) - return tuple(sorted(assets)) - - def manifest(self, *args): - """ - I answer a manifest of the named asset... - manifest(ark) - manifest(site, zone, asset) - """ - if len(args) == 1: - arg = args[0] - if isinstance(arg, Bastion.Model.isAsset): - ark = arg.ARK - else: - ark = arg - return self._manifest_ark( ark ) - elif len(args) == 3: - return self._manifest_site_zone_asset( args[0], args[1], args[2] ) - else: - raise ValueError - def provision(self, *args): """ provision(ark) - ensures that the site, zone, and asset folders exist.