Create manifest and implement sqlite driver

Use class inheritance for driver in case we need something other than
sqlite as time goes on.

SQLite is only 10% larger than the custom format used with osdk. When
compressed the SQLite manifest only fractionally larger than osdk. The
time it takes to generate and process is about 2x longer. However, the
benefit we get from using common contructs (sqlite) and tracking and
changing the manifest schema is enough for me to think this is right
way to proceed.

To generate a manifest with 2^24 objects (64TB disk) run:
tools/generate_manifest.py --backupsize 64000 --manifest /pathtosave/manifest

This will generate a 809MB file

Co-Authored-By: Sam Yaple <sam@yaple.net>
Change-Id: Ic431bfa52b6fcaeb1c6a64cf270cbb36c496335e
This commit is contained in:
Michal Jastrzebski 2016-01-08 11:09:14 -06:00 committed by SamYaple
parent 7b8e49f3d8
commit a9d13fc32f
9 changed files with 189 additions and 342 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
.testrepository/
ekko.egg-info/
*.pyc
*.swp

View File

@ -1,190 +0,0 @@
#!/usr/bin/python
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copied and licensed from https://github.com/SamYaple/osdk
from binascii import crc32
from collections import namedtuple
from datetime import datetime
from struct import pack
from struct import unpack
from uuid import UUID
import six
SIGNATURE = 'd326503ab5ca49adac56c89eb0b8ef08d326503ab5ca49adac56c89eb0b8ef08'
class EkkoShortReadError(Exception):
def __init__(self, size_read, size_requested):
self.size_read = size_read
self.size_requested = size_requested
class EkkoManifestTooNewError(Exception):
pass
class EkkoChecksumError(Exception):
pass
class EkkoInvalidSignatureError(Exception):
pass
class Manifest(object):
def __init__(self, manifest):
self.manifest = manifest
self.metadata = {'version': 0}
def write_manifest(self):
with open(self.manifest, 'wb', 1) as f:
self.write_header(f)
self.write_body(f)
def build_header(self):
data = pack(
'<i2IQH14s',
utctimestamp(),
self.metadata['info'].incremental,
self.metadata['info'].segment_size,
self.metadata['info'].sectors,
len(self.metadata['bases']),
str.encode('\0\0' * 14)
)
checksum = crc32(data)
for i in self.metadata['bases']:
data += i
checksum = crc32(i, checksum)
return data, checksum
def write_body(self, f):
checksum = 0
for k, v in six.iteritems(self.segments):
data = pack(
'<IHI2B20s',
k,
v.base,
v.incremental,
v.compression,
v.encryption,
self.hashes[k]
)
f.write(data)
checksum = crc32(data, checksum)
# Backfill the body_checksum
f.seek(24, 0)
f.write(pack('<i', checksum))
def write_header(self, f):
data, checksum = self.build_header()
def read_data(self, f, size_requested):
data = f.read(size_requested)
size_read = len(data)
if size_read != size_requested:
raise EkkoShortReadError(
'Failed to read amount of requested data',
size_read,
size_requested
)
self.checksum = crc32(data)
return data
def read_signature(self, f):
if not UUID(SIGNATURE).bytes == self.read_data(f, 32):
raise EkkoInvalidSignatureError('File signiture is not valid')
def read_header(self, f):
self.checksum = 0
Info = namedtuple(
'Info',
'timestamp incremental segment_size sectors'
)
self.read_signature(f)
version, header_checksum, body_checksum = unpack(
'<I2i', self.read_data(f, 12)
)
if self.metadata['version'] < version:
raise EkkoManifestTooNewError(
'The manifest version is newer than I know how to read'
)
self.metadata['info'] = Info._make(
unpack('<i2IQ', self.read_data(f, 20))
)
num_of_bases, _ = unpack('<H14s', self.read_data(f, 16))
self.metadata['bases'] = [
self.read_data(f, 16) for x in six.moves.range(0, num_of_bases)
]
if self.checksum != header_checksum:
raise EkkoChecksumError('Header checksum does not match')
return body_checksum
def read_body(self, f, body_checksum):
self.checksum = 0
self.segments = dict()
self.hashes = dict()
Segment = namedtuple(
'Segment',
'base incremental compression encryption'
)
try:
while True:
processing_segment = True
segment, base = unpack('<IH', self.read_data(f, 6))
self.segments[segment] = Segment(
self.metadata['bases'][base],
unpack('<I2B', self.read_data(f, 6))
)
self.hashes[segment] = unpack('<20s', self.read_data(f, 20))
processing_segment = False
except EkkoShortReadError as e:
if processing_segment or e.size_of_read != 0:
raise
if self.checksum != body_checksum:
raise EkkoChecksumError('Body checksum does not match')
def read_manifest(self):
with open(self.manifest, 'rb', 1) as f:
self.read_body(f, self.read_header(f))
def utctimestamp():
ts = datetime.utcnow() - datetime(1970, 1, 1)
return ts.seconds + ts.days * 24 * 3600

View File

41
ekko/manifest/driver.py Normal file
View File

@ -0,0 +1,41 @@
# Copyright 2016 Intel corporation
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_utils import importutils
def load_manifest_driver(manifest_location, manifest_driver=None):
if not manifest_driver:
manifest_driver = 'sqlite.SQLiteDriver'
return importutils.import_object_ns('ekko.manifest',
manifest_driver,
manifest_location)
class ManifestDriver(object):
"""Base class for manifest drivers
"""
def __init__(self, manifest_file):
self.conn = None
self.manifest_file = manifest_file
def put_metadata(self, metadata):
raise NotImplementedError()
def put_segments(self, segments):
raise NotImplementedError()

86
ekko/manifest/sqlite.py Normal file
View File

@ -0,0 +1,86 @@
# Copyright 2016 Intel corporation
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from contextlib import closing
from contextlib import contextmanager
import sqlite3
from ekko.manifest import driver
class SQLiteDriver(driver.ManifestDriver):
def initialize(self):
with self.get_conn() as conn:
with closing(conn.cursor()) as cur:
cur.executescript("""
CREATE TABLE metadata (
key TEXT PRIMARY KEY,
value TEXT
);
CREATE TABLE segments (
backupset_id BLOB,
incremental INTEGER,
segment INTEGER PRIMARY KEY,
compression TINYINT,
encryption TINYINT,
segment_hash BLOB
);
""")
conn.commit()
@contextmanager
def get_conn(self):
if not self.conn:
self.conn = sqlite3.connect(self.manifest_file)
conn = self.conn
self.conn = None
yield conn
conn.rollback()
self.conn = conn
def put_segments(self, segments):
with self.get_conn() as conn:
with closing(conn.cursor()) as cur:
for segment in segments:
cur.execute(
"INSERT INTO segments VALUES (?, ?, ?, ?, ?, ?)",
(
buffer(segment.backupset_id),
segment.incremental,
segment.segment,
segment.compression,
segment.encryption,
buffer(segment.segment_hash)
)
)
conn.commit()
def put_metadata(self, metadata):
with self.get_conn() as conn:
with closing(conn.cursor()) as cur:
cur.executemany(
"INSERT OR REPLACE INTO metadata VALUES (?, ?)",
[
('incremental', metadata.incremental),
('segment_size', metadata.segment_size),
('sectors', metadata.sectors),
('timestamp', metadata.timestamp)
]
)
conn.commit()

View File

@ -0,0 +1,41 @@
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
from uuid import uuid4 as uuid
class Metadata(object):
def __init__(self, incremental, sectors, segment_size=None,
timestamp=None, backupset_id=None):
self.timestamp = timestamp if timestamp else time.time()
self.sectors = sectors
self.incremental = incremental
self.segment_size = 4 * 1024 ** 2 # 4MiB
self.backupset_id = backupset_id if backupset_id else uuid().bytes
class Segment(object):
__slots__ = ['backupset_id', 'incremental', 'segment',
'compression', 'encryption', 'segment_hash']
def __init__(self, backupset_id, incremental, segment,
compression, encryption, segment_hash):
self.backupset_id = backupset_id
self.incremental = incremental
self.segment = segment
self.compression = compression
self.encryption = encryption
self.segment_hash = segment_hash

View File

@ -4,3 +4,4 @@
pbr>=1.6
six>=1.9.0
oslo.utils>=3.2.0 # Apache-2.0

View File

@ -1,112 +0,0 @@
#!/usr/bin/python
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copied and licensed from https://github.com/SamYaple/osdk
import argparse
from collections import namedtuple
# from hashlib import sha1
import os
import sys
from uuid import uuid4 as uuid
from ekko import manifest
from six.moves import range
def parse_args():
parser = argparse.ArgumentParser(description='Backup Block Device')
parser.add_argument('--backupsize', required=True, type=int,
help='Size of backup for manifest gen (size in GB)')
parser.add_argument('--manifest', required=True,
help='manifest file')
parser.add_argument('--cbt', required=False,
help='change block tracking info')
return parser.parse_args()
def read_segments(segments, size, backup):
backup.segments = dict()
backup.hashes = dict()
Segment = namedtuple(
'Segment',
'base incremental compression encryption'
)
for segment in segments:
# Generate manifest info for each object in backup
backup.segments[segment] = Segment(
len(backup.metadata['bases']) - 1,
backup.metadata['info'].incremental,
0,
0
)
# Random string simulating hash sha
backup.hashes[segment] = os.urandom(20)
def generate_mem_struct(segments, size, backup):
b = {
'96153320-980b-4b5e-958f-ea57812b280d': []
}
for seg in segments:
b['96153320-980b-4b5e-958f-ea57812b280d'].append({
seg: backup.metadata['info'].incremental
})
return b
def check_manifest(manifest_file):
return os.path.isfile(manifest_file)
def main():
args = parse_args()
segment_size = 4 * 1024**2 # 4MiB
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
num_of_sectors = int(size_of_disk / 512)
num_of_segments = int(size_of_disk / segment_size)
incremental = 0
Info = namedtuple(
'Info',
'timestamp incremental segment_size sectors'
)
if check_manifest(args.manifest):
print('manifest exists; exiting')
return
backup = manifest.Manifest(args.manifest)
backup.metadata['info'] = Info(
manifest.utctimestamp(),
incremental,
segment_size,
num_of_sectors,
)
backup.metadata['bases'] = [uuid().bytes]
# read_segments(range(0, num_of_segments - 1), segment_size, backup)
generate_mem_struct(range(0, num_of_segments - 1), segment_size, backup)
if __name__ == '__main__':
sys.exit(main())

View File

@ -18,14 +18,12 @@
import argparse
from collections import namedtuple
# from hashlib import sha1
import os
import sys
from uuid import uuid4 as uuid
sys.path.insert(0, '/root/ekko/')
from ekko import manifest
from ekko.manifest import driver as manifest_driver
from ekko.manifest import structure as manifest_structure
from six.moves import range
@ -40,24 +38,16 @@ def parse_args():
return parser.parse_args()
def read_segments(segments, size, backup):
backup.segments = dict()
backup.hashes = dict()
Segment = namedtuple(
'Segment',
'base incremental compression encryption'
)
def read_segments(segments, metadata):
for segment in segments:
# Generate manifest info for each object in backup
backup.segments[segment] = Segment(
len(backup.metadata['bases']) - 1,
backup.metadata['info'].incremental,
yield manifest_structure.Segment(
metadata.backupset_id,
metadata.incremental,
segment,
0,
0
0,
os.urandom(20)
)
# Random string simulating hash sha
backup.hashes[segment] = os.urandom(20)
def check_manifest(manifest_file):
@ -66,35 +56,24 @@ def check_manifest(manifest_file):
def main():
args = parse_args()
segment_size = 4 * 1024**2 # 4MiB
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
num_of_sectors = int(size_of_disk / 512)
num_of_segments = int(size_of_disk / segment_size)
incremental = 0
Info = namedtuple(
'Info',
'timestamp incremental segment_size sectors'
)
if check_manifest(args.manifest):
print('manifest exists; exiting')
return
backup = manifest.Manifest(args.manifest)
manifest = manifest_driver.load_manifest_driver(args.manifest)
backup.metadata['info'] = Info(
manifest.utctimestamp(),
incremental,
segment_size,
num_of_sectors,
)
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
num_of_sectors = int(size_of_disk / 512)
incremental = 0
metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors)
backup.metadata['bases'] = [uuid().bytes]
manifest.initialize()
manifest.put_metadata(metadata)
read_segments(range(0, num_of_segments - 1), segment_size, backup)
num_of_segments = int(size_of_disk / metadata.segment_size)
segments = read_segments(range(0, num_of_segments - 1), metadata)
backup.write_manifest()
manifest.put_segments(segments)
if __name__ == '__main__':
sys.exit(main())