Files

Jakub Jelinek 81209ba399 Add a script to copy inspection data between Swift buckets

There is no way to migrate inspection data automatically.
This script documents one of the ways how to copy data
between two Swift buckets.

Change-Id: I4a86faab5e7abef17064e3c716dc17b6a2f21f39
Signed-off-by: Jakub Jelinek <jakub.jelinek@cern.ch>
Assisted-by: Claude 4.5 Sonnet (Anthropic AI)

2025-11-12 17:12:39 +00:00

3.7 KiB

Raw Blame History

Copy inspection data between Swift buckets

This script assumes that you have S3 credentials for the buckets. You only have to configure the 5 configuration parameters.

import boto3
import json
from botocore.exceptions import ClientError

# Configure your S3 buckets and Ceph endpoint
SOURCE_BUCKET = ''
DEST_BUCKET = ''

# Ceph S3 configuration
CEPH_ENDPOINT = ''
CEPH_ACCESS_KEY = ''
CEPH_SECRET_KEY = ''

def get_s3_client():
    """Initialize and return S3 client for Ceph"""
    session = boto3.Session(
        aws_secret_access_key=CEPH_SECRET_KEY,
        aws_access_key_id=CEPH_ACCESS_KEY)
    return session.client(
        's3',
        endpoint_url=CEPH_ENDPOINT)

def list_files_to_process(s3_client, bucket):
    """List all files in bucket that don't end with '-UNPROCESSED'"""
    files = []
    try:
        paginator = s3_client.get_paginator('list_objects_v2')
        for page in paginator.paginate(Bucket=bucket):
            if 'Contents' in page:
                for obj in page['Contents']:
                    key = obj['Key']
                    if not key.endswith('-UNPROCESSED'):
                        files.append(key)
    except ClientError as e:
        print(f"Error listing files: {e}")
        raise
    return files

def load_json_from_s3(s3_client, bucket, key):
    """Load and parse JSON file from S3"""
    try:
        response = s3_client.get_object(Bucket=bucket, Key=key)
        content = response['Body'].read().decode('utf-8')
        return json.loads(content)
    except ClientError as e:
        print(f"Error reading {key}: {e}")
        raise
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON from {key}: {e}")
        raise

def save_json_to_s3(s3_client, bucket, key, data):
    """Save JSON data to S3"""
    try:
        s3_client.put_object(
            Bucket=bucket,
            Key=key,
            Body=json.dumps(data, indent=2),
            ContentType='application/json'
        )
        print(f"Saved: {key}")
    except ClientError as e:
        print(f"Error saving {key}: {e}")
        raise

def process_files():
    """Main processing function"""
    s3_client = get_s3_client()
    print(f"Fetching files from {SOURCE_BUCKET}...")
    files = list_files_to_process(s3_client, SOURCE_BUCKET)
    print(f"Found {len(files)} files to process")

    # Process each file
    for file_key in files:
        print(f"\nProcessing: {file_key}")

        try:
            # Load JSON data
            data = load_json_from_s3(s3_client, SOURCE_BUCKET, file_key)

            # Split data
            inventory = data.pop('inventory', None)
            plugin = data

            # Check if inventory key existed
            if inventory is None:
                print(f"Warning: 'inventory' key not found in {file_key}")

            # Generate output filenames
            inventory_key = f"{file_key}-inventory"
            plugin_key = f"{file_key}-plugin"

            # Save split files
            if inventory is not None:
                save_json_to_s3(s3_client, DEST_BUCKET, inventory_key, inventory)
            if plugin is not None:
                save_json_to_s3(s3_client, DEST_BUCKET, plugin_key, plugin)

        except Exception as e:
            print(f"Failed to process {file_key}: {e}")
            continue

    print("\nProcessing complete!")

if __name__ == "__main__":
    process_files()

3.7 KiB Raw Blame History

Copy inspection data between Swift buckets

3.7 KiB

Raw Blame History