Protection service basic design doc
design document for protection service which includes the high level architecture of protection service, the class-diagram and some sequence diagrams for several typical scenarios Change-Id: I4c7c37a71a2beb1e63807d3c15e3a5931febde0d Closes-bug: #1529199
This commit is contained in:
parent
676b8205bf
commit
e0c8ee3d75
BIN
doc/images/protection-service/class-diagram.png
Normal file
BIN
doc/images/protection-service/class-diagram.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 162 KiB |
BIN
doc/images/protection-service/protect-rpc-call-seq-diagram.png
Normal file
BIN
doc/images/protection-service/protect-rpc-call-seq-diagram.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
BIN
doc/images/protection-service/protection-architecture.png
Normal file
BIN
doc/images/protection-service/protection-architecture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 41 KiB |
173
doc/source/specs/protection-service/class-diagram.pu
Normal file
173
doc/source/specs/protection-service/class-diagram.pu
Normal file
@ -0,0 +1,173 @@
|
||||
@startuml
|
||||
|
||||
title ProtectionService Class Diagram
|
||||
|
||||
class RpcServer {
|
||||
-endpoints: []Manager_Class
|
||||
-target: messaging.Target
|
||||
}
|
||||
|
||||
class ProtectionManager {
|
||||
+<<rpc call>>execute_operation(backup_plan:BackupPlan, action:Action)
|
||||
+<<rpc call>>list_providers(list_options:{}): []Providers
|
||||
+<<rpc call>>show_provider(provider_id:String}:Provider
|
||||
+<<rpc call>>list_checkpoints(list_options:{}): []Checkpoint
|
||||
+<<rpc call>>show_checkpoint(provider_id:String, checkpoint_id:String): Checkpoint
|
||||
+<<rpc call>>delete_checkpoint(provider_id:String, checkpoint_id:String):void
|
||||
-protect_operation(backup_plan:BackupPlan):void
|
||||
-restore_operation(backup_plan:BackupPlan):void
|
||||
-workFlowEngine:WorkFlowEngine
|
||||
}
|
||||
|
||||
RpcServer*-right->ProtectionManager:has many as endpoints
|
||||
|
||||
class WorkFlowEngine {
|
||||
+build_task_flow(backup_plan:BackupPlan, provider:ProtectionProvider):flow:taskflow.flow.Flow
|
||||
+execute(executor_type:String, flow:taskflow.flow.Flow):void
|
||||
}
|
||||
|
||||
ProtectionManager*->WorkFlowEngine:has one
|
||||
|
||||
class taskflow.engines.action_engine.engine.ParallelActionEngine {
|
||||
...
|
||||
}
|
||||
|
||||
WorkFlowEngine*-up->taskflow.engines.action_engine.engine.ParallelActionEngine:load one
|
||||
|
||||
class taskflow.patterns.graph_flow.Flow {
|
||||
...
|
||||
}
|
||||
|
||||
WorkFlowEngine -right-> taskflow.patterns.graph_flow.Flow:generate one per operation execution
|
||||
|
||||
class taskflow.task.Task {
|
||||
...
|
||||
}
|
||||
|
||||
taskflow.patterns.graph_flow.Flow->taskflow.task.Task:composed by many
|
||||
|
||||
class CreateCheckpointTask extends taskflow.task.Task {
|
||||
-backup_plan:BackupPlan
|
||||
-checkpoint_collection:CheckpoinCollection
|
||||
+execute(): void
|
||||
+revert(): void
|
||||
}
|
||||
|
||||
class SyncCheckpointStatusTask extends taskflow.task.Task{
|
||||
-checkpoint:Checkpoint
|
||||
-checkpoint_collection:CheckpoinCollection
|
||||
+execute(): void
|
||||
+revert():void
|
||||
}
|
||||
|
||||
interface CheckpointCollectionInterface {
|
||||
+list(list_options:dict): []Checkpoints
|
||||
+show(checkpoint_id:String): Checkpoint
|
||||
+delete(checkpoint_id:String):void
|
||||
+create(plan:ProtectionPlan): Checkpoint
|
||||
+update(checkpoint:Checkpoint, kwargs:{}): void
|
||||
}
|
||||
|
||||
class CheckpointCollection implements CheckpointCollectionInterface{
|
||||
-bank_plugin:BankPluginInterface
|
||||
+init(bank_plugin:BankPlugin):void
|
||||
..checkpoint functions..
|
||||
...
|
||||
}
|
||||
|
||||
CheckpointCollection*-down->BankPlugin:has one
|
||||
|
||||
class ProviderRegistry{
|
||||
+list_providers(list_options:{}):[]ProtectionProvider
|
||||
+show_provider(provider_id:String):ProtectionProvider
|
||||
-load_providers(cfg_file:String):void
|
||||
}
|
||||
|
||||
ProtectionManager*-down->ProviderRegistry:has a
|
||||
|
||||
interface ProtectionProvider {
|
||||
+build_task_flow(backup_plan:BackupPlan, action:Action):taskflow.patterns.graph_flow.Flow
|
||||
}
|
||||
|
||||
ProviderRegistry "1"*-down->"many" ProtectionProvider:manage
|
||||
|
||||
ProtectionProvider-right->taskflow.patterns.graph_flow.Flow:generates
|
||||
|
||||
taskflow.patterns.graph_flow.Flow->taskflow.patterns.graph_flow.Flow:composed by
|
||||
|
||||
class PluggableProtectionProvider implements ProtectionProvider{
|
||||
+get_protection_plugin(protectable_type:ProtectableType):ProtectionPluginInterface
|
||||
+get_bank_plugin():BankPlugin
|
||||
+get_checkpoint_collection():CheckpointCollection
|
||||
+build_task_flow(backup_plan:BackupPlan): taskflow.patterns.graph_flow.Flow
|
||||
-load_plugin(cfg_file:String):void
|
||||
-bank_plugin:BankPluginInterface
|
||||
-plugins:{ProtectableType:ProtectionPluginInterface}
|
||||
-checkpoint_collection:CheckpointCollection
|
||||
}
|
||||
|
||||
interface ProtectionPluginInterface {
|
||||
..getter functions..
|
||||
+get_supported_resources_types(): []ResourceType
|
||||
..protect action functions..
|
||||
+get_protection_status(protection_id:String):Enum
|
||||
..graph walk functions..
|
||||
+on_resource_start(context: Context)
|
||||
+on_resource_end(context: Context)
|
||||
..schema functions..
|
||||
+get_options_schema(resource_type: ResourceType)
|
||||
+get_saved_info_schema(resource_type: ResourceType)
|
||||
+get_restore_schema(resource_type: ResourceType)
|
||||
+get_saved_info(resource: Resource)
|
||||
}
|
||||
|
||||
class ProtectionPlugin implements ProtectionPluginInterface {
|
||||
-protectable_type:String
|
||||
-schema:[]String
|
||||
..getter functions..
|
||||
...
|
||||
..protect action functions..
|
||||
...
|
||||
..graph walk functions..
|
||||
...
|
||||
..schema functions..
|
||||
...
|
||||
}
|
||||
|
||||
PluggableProtectionProvider "1" *-left->"many" ProtectionPlugin:aggregates
|
||||
|
||||
Interface BankPluginInterface {
|
||||
+chroot(context:dict):void
|
||||
+create_object(key:String, value:Object):void
|
||||
+update_object(key:String, options:dict, value:Object):void
|
||||
+show_object(key:String):dict
|
||||
+get_object(key:String):dict
|
||||
+delete_object(key:String):void
|
||||
+list_objects(options:dict):void
|
||||
+acquire_lease(): void
|
||||
+renew_lease(): void
|
||||
+check_lease_validity():bool
|
||||
}
|
||||
|
||||
class BankPlugin implements BankPluginInterface {
|
||||
-stroage_url:URL
|
||||
-context:dict
|
||||
-owner_id:String
|
||||
-expired_time:Long
|
||||
-renew_time:Long
|
||||
..object functions..
|
||||
...
|
||||
..lease functions..
|
||||
...
|
||||
}
|
||||
|
||||
PluggableProtectionProvider "1" *-down->"1" CheckpointCollection:has a
|
||||
|
||||
class ProtectionData {
|
||||
+protection_id:String
|
||||
+protection_target:RestoreTarget
|
||||
+status:Enum
|
||||
}
|
||||
|
||||
ProtectionPlugin -up->ProtectionData: create one
|
||||
@enduml
|
@ -0,0 +1,24 @@
|
||||
@startuml
|
||||
|
||||
title create_checkpoint - API RPC call Sequence Diagram
|
||||
|
||||
Smaug_API_Service ->> ProtectionManager :create_checkpoint(backup_plan, protect)
|
||||
ProtectionManager -> WorkflowEngine:built task flow
|
||||
ProtectionManager -> WorkflowEngine: execute task flow
|
||||
WorkflowEngine -> CreateCheckpointTask:execute()
|
||||
CreateCheckpointTask -> Checkpoints : create_checkpoint()
|
||||
Checkpoints -\ BankPlugin : check_lease_validity(owner_id)
|
||||
Checkpoints -\ BankPlugin : put(checkpoint_key, value)
|
||||
Checkpoints -\ BankPlugin : build indexes, put(index_key, value)
|
||||
WorkflowEngine -> ResourceProtectTask:execute()
|
||||
ResourceProtectTask -\ ProtectionPlugin : protect(protectable)
|
||||
ResourceProtectTask -> Checkpoints : create_protection_definition (checkpoint, protectable)
|
||||
Checkpoints -\ BankPlugin : put(protection_definition_key, value)
|
||||
WorkflowEngine -> SyncCheckpointStatusTask :execute()
|
||||
SyncCheckpointStatusTask -\ProtectionPlugin : get_protection_status(protectable)
|
||||
ProtectionPlugin --\ SyncCheckpointStatusTask : Return protection status
|
||||
SyncCheckpointStatusTask -> Checkpoints : update_protection_definition (checkpoint, protectable, {'status':finished})
|
||||
Checkpoints -\ BankPlugin : put(protection_definition_key, value)
|
||||
SyncCheckpointStatusTask ->Checkpoints : update_checkpoint(checkpoint, {'status':finished})
|
||||
Checkpoints -\ BankPlugin : put(checkpoint_key, updated_value)
|
||||
@enduml
|
115
doc/source/specs/protection-service/protection-service.rst
Normal file
115
doc/source/specs/protection-service/protection-service.rst
Normal file
@ -0,0 +1,115 @@
|
||||
..
|
||||
This work is licensed under a Creative Commons Attribution 3.0 Unported
|
||||
License.
|
||||
|
||||
http://creativecommons.org/licenses/by/3.0/legalcode
|
||||
|
||||
====================================
|
||||
Protection Service Basics
|
||||
====================================
|
||||
|
||||
https://bugs.launchpad.net/smaug/+bug/1529199
|
||||
|
||||
Protection Service is a component of smaug (an openstack project working as a service for data protection), which is responsible to execute protect/restore/other actions on operations (triggered plans).
|
||||
|
||||
Architecturally, it acts as a RPC server role for smaug API service to actually execute the actions on triggered operations.
|
||||
|
||||
It's also the role who actually cooperates with protection plugins provided by providers. It will load providers (composed by a series of plugins) and thus manage them.
|
||||
|
||||
Internally, protection service will construct work flow for each operation action execution, where tasks in work flow will be linked to a graph by resource dependency and thus be executed on parallel or linearly according to the graph task flow.
|
||||
|
||||
RPC interfaces
|
||||
================================================
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/openstack/smaug/master/doc/images/protection-service/protection-architecture.png
|
||||
|
||||
From the module graph, protection service basically provide following RPC calls:
|
||||
|
||||
Operation RPC:
|
||||
--------------------
|
||||
**execute_operation(backup_plan:Bac,upPlan, action:Action):** where action could be protect or restore
|
||||
|
||||
Provider RPC:
|
||||
-------------
|
||||
**list_providers(list_options:dict): []Providers:**
|
||||
|
||||
**show_provider(provider_id:String}:Provider**
|
||||
|
||||
Checkpoint RPC:
|
||||
---------------
|
||||
|
||||
**list_checkpoints(list_options:{}): []Checkpoints**
|
||||
|
||||
**show_checkpoint(provider_id:String, checkpoint_id:String): Checkpoint**
|
||||
|
||||
**delete_checkpoint(provider_id:String, checkpoint_id:String):void**
|
||||
|
||||
Main Concept
|
||||
============
|
||||
.. image:: https://raw.githubusercontent.com/openstack/smaug/master/doc/images/protection-service/class-diagram.png
|
||||
|
||||
|
||||
Protection Manager
|
||||
------------------
|
||||
Endpoint of the RPC server, which will handle Operation RPC calls and dispatch other RPC calls to corresponding components.
|
||||
|
||||
It will produce a graph work flow for each operation execution, and have the work flow to be executed through its work flow engine.
|
||||
|
||||
ProviderRegistry
|
||||
----------------
|
||||
|
||||
Entity to manage multiple providers, which will load provider definitions on init from config files and maintain them in memory map.
|
||||
|
||||
It will actually handle RPC related to provider management, like list_providers() or show_provider().
|
||||
|
||||
CheckpointCollection
|
||||
--------------------
|
||||
|
||||
Entity to manage checkpoints, which provides CRUD interfaces to handle checkpoint. As checkpoint is a smaug internal entity, one checkpoint operation is actually composed by combination of serveral BankPlugin atomic operations.
|
||||
|
||||
Take create_checkpoint as example, it will first acquire write lease (there will be detailed **lease** design doc) to avoid conflict with GC deletion, then it needs create key/value for checkpoint itself. After that, it will build multiple indexes for easier list checkpoints.
|
||||
|
||||
Typical scenario
|
||||
======================================
|
||||
A typical scenario will start from a triggered operation being sent through RPC call to Protection Service.
|
||||
|
||||
Let's take action protect as the example and analyze the sequence together with the class graph:
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/openstack/smaug/master/doc/images/protection-service/protect-rpc-call-seq-diagram.png
|
||||
|
||||
1. Smaug **Operation Engine**
|
||||
------------------------------
|
||||
who is responsible for triggering operation according to time schedule or events, will call RPC call of Protection Service: execute_operation(backup_plan:Bac,upPlan, action:Action);
|
||||
|
||||
2. ProtectionManager
|
||||
------------------------
|
||||
who plays as one of the RPC server endpoints, and will handle this RPC call by following sequence:
|
||||
|
||||
2.1 CreateCheckpointTask:
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This task will be the start point task of the graph flow. This task will call the unique instance of class **Checkpoints**:create_checkpoint(plan:ProtectionPlan), to create one checkpoint to persist the status of the action execution.
|
||||
|
||||
The instance of **Checkpoints** will retrieve the **Provider** from input parameter **BackupPlan**, and get the unique instance of **BankPlugin**.
|
||||
|
||||
While **BankPlugin** provides interfaces for CRUD key/values in **Bank** and lease interfaces to avoid write/delete conflict, **Checkpoints** is responsible for the whole procedure of create checkpoint, including grant lease, create key/value of checkpoint, build indexes etc. through composing calls to **BankPlugin**
|
||||
|
||||
2.2 call ProtectionProvider to build sub task flow:
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This task is built by walking through **resource tree** (see **Pluggable protection provider** doc), which will return a graph flow. The result graph flow could be composed by single task or multiple tasks built with dependencies.
|
||||
|
||||
The graph flow returned by ProtectionProvider would be added to the top layer task flow, right behind the start point task **CreateCheckpointTask**, and will be executed with parallel engine.
|
||||
|
||||
When it comes to each resource task returned from ProtectionProvider task flow building, each task will call protect() interface of related ProtectionPlugin. There, we will get ProtectionData as the return result, which describes the restore target (where the resource is protected to) and the id of the protection data (backup id, snapshot id, image id etc., anything). This ProtectionData will be persisted into Bank under the corresponding **ProtectionDefinition**.
|
||||
|
||||
2.3 SyncCheckpointStatusTask:
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This task is added into the top layer task flow right after the task flow built form ProtectProvider, which will be executed only when all tasks/flows ahead of it have been executed successfully.
|
||||
|
||||
This task will list all **ProtectionDefinition** under one checkpoint, for each ProtectionDefinition: if its ProtectionData status hasn't turned to be available, this task will check its protection_id status (backup, snapshot, replication status) by calling ProtectionPlugin.get_protection_status(). If any ProtectionData turns to be available, its status will be updated to the corresponding ProtectionDefinition and won't be checked next time.
|
||||
|
||||
Since each protect action will take some time to achieve finished status (ProtectionData turns to be available), this task could be executed periodically or only executed once before timeout.
|
||||
|
||||
Until the operation timeout, this task will get the final status of this checkpoint: if all protect actions have achieved finished status, then the checkpoint is finished; otherwise, the checkpoint is broken and will be abandoned.
|
Loading…
Reference in New Issue
Block a user