-- (c) 2015 Hewlett-Packard Development Company, L.P. -- -- Licensed under the Apache License, Version 2.0 (the "License"); -- you may not use this file except in compliance with the License. -- You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -- implied. -- See the License for the specific language governing permissions and -- limitations under the License. -- Migrate data from a storyboard schema to a phabricator schema -- Assumes standard phabricator schema and that storyboard schema is loaded -- in storyboard adjacent to the phabricator schema use storyboard; delimiter // -- phabricator uses an artificial id for everything to facilitate vertical -- sharding without needing cross-repo joins. You can tell they started life -- at Facebook drop function if exists make_phid // create function make_phid(slug varbinary(9)) returns varbinary(30) BEGIN return concat( 'PHID-', concat(slug, concat('-', left( concat( lower(conv(floor(rand() * 99999999999999), 20, 36)), lower(conv(floor(rand() * 99999999999999), 20, 36)) ), 24-length(slug) )))); END // -- There are several places where columns need a random string of a length -- This is kinda lame way to make it (make a 256 character random string, then -- return the first len characters, but it gets the job done drop function if exists make_cert // create function make_cert(len integer) returns varbinary(255) BEGIN return left( concat( md5(rand()), md5(rand()), md5(rand()), md5(rand()), md5(rand()), md5(rand()), md5(rand()), md5(rand()) ), len); END // -- We probably won't eventually use this, but this creates an apache style -- 1000 md5s in a row password hash that we can use to seed the database -- with a bunch of passwords drop function if exists make_hash // create function make_hash(input varbinary(255)) returns varbinary(255) BEGIN DECLARE x int; DECLARE str VARBINARY(255); SET x = 0; SET str = input; while x < 1000 DO SET str = md5(str); SET x = x + 1; END WHILE; return str; END // delimiter ; -- We're going to generate PHID values in each of the original tables -- so that we can inject via joins alter table users add column phid varbinary(64); alter table stories add column phid varbinary(64); alter table tasks add column phid varbinary(64); alter table tasks add column storyPHID varbinary(64); alter table projects add column phid varbinary(64); alter table project_groups add column phid varbinary(64); alter table comments add column phid varbinary(64); alter table comments add column transPHID varbinary(64); -- Add PHIDs to everything update users set phid = make_phid('USER'); update stories set phid = make_phid('TASK'); update tasks set phid = make_phid('TASK'); update projects set phid = make_phid('PROJ'); update project_groups set phid = make_phid('PROJ'); update comments set phid = make_phid('XCMT'); update comments set transPHID = make_phid('XACT-TASK'); -- We want to track what story the task was related to without needing a -- backreference join update stories, tasks set tasks.storyPHID=stories.phid where stories.id=tasks.story_id; -- There are a bunch of duplicate users in the storyboard db that are listed -- with @example.com email addresses. username is unique in phabricator update users set id=concat(id, '_') where email like '%example.com' and id not like '%_'; -- Create temporary table that helps us sort stories with a single task -- from stories with multiple tasks drop table if exists task_count; create table task_count select story_id, storyPHID, count(storyPHID) as count from tasks group by storyPHID; -- Scrub the data into something a bit more usable before we import alter table tasks modify column `priority` enum('low', 'medium', 'high', 'wishlist'); update tasks set priority='wishlist' where priority is null; update tasks set status='todo' where status is NULL; -- We're straight re-using the ids, so we need to make sure story and task ids -- don't conflict. -- Also, id's start with a T now, so we don't need to do as much to avoid -- overlap with launchpad ids alter table tasks drop foreign key tasks_ibfk_4; update stories set id = id+3000 where id < 3000; update tasks set story_id = story_id + 3000 where story_id < 3000; update events set story_id = story_id + 3000 where story_id < 3000; update stories set id = id - 2000000 + 4000 where id >= 2000000; update tasks set story_id = story_id - 2000000 + 4000 where story_id >= 2000000; update events set story_id = story_id - 2000000 + 4000 where story_id < 2000000; use phabricator_user delete from user; delete from user_email; insert into user select id as id, phid as phid, email as userName, if(full_name is NULL, email, full_name) as realName, NULL as sex, NULL as translation, storyboard.make_cert(32) as passwordSalt, '' as passwordHash, unix_timestamp(created_at) as dateCreated, if(updated_at is NULL, unix_timestamp(now()), unix_timestamp(updated_at)) as dateModified, NULL as profileImagePHID, 0 as consoleEnabled, 0 as consoleVisible, '' as consoleTab, storyboard.make_cert(255) as conduitCertificate, 0 as isSystemAgent, 0 as isDisabled, is_superuser as isAdmin, 'UTC' as timezoneIdentifier, 0 as isEmailVerified, 1 as isApproved, 1 as accountSecret, 1 as isEnrolledInMultiFactor, NULL as profileImageCache, NULL as availabilityCache, NULL as availabilityCacheTTL, 0 as isMailingList from storyboard.users; update user set passwordHash = concat( 'md5:', storyboard.make_hash( concat(username, 'password', phid, passwordSalt))); insert into user_email select id, phid, email, 1, 1, storyboard.make_cert(24), unix_timestamp(created_at), if(updated_at is NULL, unix_timestamp(now()), unix_timestamp(updated_at)) from storyboard.users; use phabricator_maniphest -- priorities -- 100 = Unbreak Now! -- 90 = Needs Triage -- 80 = High -- 50 = Normal -- 25 = Low -- 0 = Wishlist delete from maniphest_task; delete from edge; delete from maniphest_transaction; delete from maniphest_transaction_comment; -- stories with one task get collapsed in a single new task insert into maniphest_task select s.id, -- id s.phid, -- phid if(s.creator_id is NULL, '', s.creator_id), -- second pass authorPHID if(t.assignee_id is NULL, NULL, t.assignee_id), -- second pass ownerPHID '', -- attached case t.status -- status when 'todo' then 'open' when 'inprogress' then 'inprogress' when 'invalid' then 'invalid' when 'review' then 'review' when 'merged' then 'merged' when 'invalid' then 'invalid' end, case t.priority -- priority when 'high' then 80 when 'medium' then 50 when 'low' then 25 when 'wishlist' then 0 end, s.title, -- title s.title, -- originalTitle s.description, -- description unix_timestamp(s.created_at), -- dateCreated if(t.updated_at is NULL, unix_timestamp(t.created_at), unix_timestamp(t.updated_at)), -- dateUpdated '[]', -- update in second pass - projectPHIDs storyboard.make_cert(20), -- mailKey NULL, -- ownerOrdering NULL, -- originalEmailSource 0, -- subpriority 'users', -- viewPolicy 'users', -- editPolicy NULL -- spacePHID from storyboard.stories s, storyboard.tasks t, storyboard.task_count c where s.id = t.story_id and c.story_id=s.id and c.count = 1; -- For stories with more than one task, each task becomes a new task insert into maniphest_task select t.id, t.phid, if(t.creator_id is NULL, '', t.creator_id), -- u.phid, if(t.assignee_id is NULL, NULL, t.assignee_id), -- second pass '', case t.status when 'todo' then 'open' when 'inprogress' then 'inprogress' when 'invalid' then 'invalid' when 'review' then 'review' when 'merged' then 'merged' when 'invalid' then 'invalid' end, case t.priority when 'high' then 80 when 'medium' then 50 when 'low' then 25 when 'wishlist' then 0 end, t.title, t.title, '', unix_timestamp(t.created_at), if(t.updated_at is NULL, unix_timestamp(t.created_at), unix_timestamp(t.updated_at)), '[]', -- update in second pass storyboard.make_cert(20), NULL, NULL, 0, 'users', 'users', NULL -- spacePHID from storyboard.stories s, storyboard.tasks t, storyboard.task_count c where s.id = t.story_id and c.story_id=s.id and c.count > 1; -- For stories with more than one task, each story also becomes a task, but -- it doesn't have a project associated with it insert into maniphest_task select s.id, s.phid, if(s.creator_id is NULL, '', s.creator_id), -- u.phid, NULL, '', 'open', 50, s.title, s.title, s.description, unix_timestamp(s.created_at), if(s.updated_at is NULL, unix_timestamp(s.created_at), unix_timestamp(s.updated_at)), '[]', storyboard.make_cert(20), NULL, NULL, 0, 'users', 'users', NULL -- spacePHID from storyboard.stories s, storyboard.task_count c, storyboard.users u where c.story_id=s.id and c.count > 1 and u.id = s.creator_id; -- Set the author and owner PHIDs as a second pass to avoid really crazy -- join semantics above. It could be done ... but why? update maniphest_task, storyboard.users set maniphest_task.authorPHID=storyboard.users.phid where maniphest_task.authorPHID=storyboard.users.id; update maniphest_task, storyboard.users set maniphest_task.ownerPHID=storyboard.users.phid where maniphest_task.ownerPHID=storyboard.users.id; -- Releationships are edges in a DAG, so set up relationships between -- tasks with their owners and authors in both directions insert into edge select authorPHID, 22, phid, 0, 0, NULL from maniphest_task; insert into edge select phid, 21, authorPHID, 0, 0, NULL from maniphest_task; replace into edge select ownerPHID, 22, phid, 0, 0, NULL from maniphest_task where ownerPHID is not null; replace into edge select phid, 21, ownerPHID, 0, 0, NULL from maniphest_task where ownerPHID is not null; -- Comments have two parts - the first is an entry in the transaction table -- indicating that a comment happened and associating the comment with the task insert into maniphest_transaction select c.id, -- id c.transPHID, -- phid u.phid, -- authorPHID s.phid, -- objectPHID 'public', -- viewPolicy u.phid, -- editPolicy c.phid, -- commentPHID 1, -- commentVersion 'core:comment', -- transactionType 'null', -- oldValue 'null', -- newValue '{"source":"web"}', -- contentSource '[]', -- metadata unix_timestamp(c.created_at), -- dateCreated if(c.updated_at is null, unix_timestamp(c.created_at), unix_timestamp(c.updated_at)) -- dateUpdated from storyboard.comments c, storyboard.events e, storyboard.stories s, storyboard.users u where c.id = e.comment_id and s.id = e.story_id and e.event_type='user_comment' and s.creator_id = u.id; -- The second part is the comment payload itself insert into maniphest_transaction_comment select c.id, -- id c.phid, -- phid c.transPHID, -- transactionPHID u.phid, -- author 'public', -- viewPolicy u.phid, -- editPolicy 1, -- if(c.content is NULL, '', c.content), '{"source":"web"}', if(c.is_active, false, true), unix_timestamp(c.created_at), if(c.updated_at is null, unix_timestamp(c.created_at), unix_timestamp(c.updated_at)) from storyboard.comments c, storyboard.events e, storyboard.stories s, storyboard.users u where c.id = e.comment_id and s.id = e.story_id and e.event_type='user_comment' and s.creator_id = u.id; -- We go back over to storyboard repo to create some calculated tables to -- help us do project mapping. We needed to run in the tasks first so that -- we can easily tell which tasks we need projects for. use storyboard drop table if exists task_project; drop table if exists task_project_list; drop table if exists task_project_grouping; -- This is a table mapping tasks to every project it's associated with in a -- clean and easy fashion for later create table task_project select t.phid as task_phid, p.phid as project_phid, t.project_id as project_id from tasks t, phabricator_maniphest.maniphest_task m, projects p where m.phid=t.phid and t.project_id is not null and t.project_id = p.id; -- We also add project groups to this table insert into task_project select t.task_phid, g.phid, t.project_id from task_project t, project_groups g, project_group_mapping m where t.project_id = m.project_id and g.id = m.project_group_id; -- based on that table, we make a new table so that we can modify and create -- the comma-separated json list create table task_project_list select task_phid, project_phid from task_project; update task_project_list set project_phid = concat('"', project_phid, '"'); -- use group_concat to get a row for each task and then a comma-sep list of -- projects. Since we wrapped them all in " above, this will be comma-sep and -- quoted create table task_project_grouping select task_phid, group_concat(project_phid) as phids from task_project_list group by task_phid; -- Finally, wrap the results in [ and ] update task_project_grouping set phids = concat('[', phids, ']'); -- We need to map tasks to dependent tasks. Lucky for us, Storyboard only -- groks one level of this. Make a table for easy of importing later drop table if exists task_subtask; create table task_subtask select tasks.phid, tasks.storyPHID from tasks, phabricator_maniphest.maniphest_task where tasks.phid = phabricator_maniphest.maniphest_task.phid; -- Grab a PHID to use as an author for the projects. -- TODO: Make a system/bot account that we can use as the "owner" of these -- projects. But I'll do for now. select phid into @author_phid from users where email='craige@mcwhirter.com.au'; use phabricator_project delete from project; insert into project select id, -- id name, -- name phid, -- phid @author_phid, unix_timestamp(created_at), if(updated_at is NULL, unix_timestamp(created_at), unix_timestamp(updated_at)), 0, -- status '[]', -- subprojectPHIDs concat(replace(lower(name), '/', '_'), '/'), 'users', 'users', 'users', 0, NULL, 'fa-briefcase', 'blue', '12345678901234567890' -- mailKey from storyboard.projects; insert into project select id + 1000, name, phid, @author_phid, unix_timestamp(created_at), if(updated_at is NULL, unix_timestamp(created_at), unix_timestamp(updated_at)), 0, -- status '[]', -- subprojectPHIDs concat(replace(lower(name), '/', '_'), '/'), 'users', 'users', 'users', 0, NULL, 'fa-briefcase', 'blue', '12345678901234567890' -- mailKey from storyboard.projects; from storyboard.project_groups; delete from project_slug; insert into project_slug select id, phid, replace(lower(name), '/', '_'), dateCreated, dateModified from project; -- insert into project_datasourcetoken (need to split name on - and do a row for each value) for typeahead search in boxes insert into project_datasourcetoken (projectID, token) SELECT p.id, SUBSTRING_INDEX(SUBSTRING_INDEX( p.name, '-', n.n), '-', -1) value FROM project p CROSS JOIN ( SELECT a.N + b.N * 10 + 1 n FROM (SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a, (SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b ORDER BY n ) n WHERE n.n <= 1 + (LENGTH(p.name) - LENGTH(REPLACE(p.name, '-', ''))) ORDER BY value; -- More DAG magic - this will map a project to a task in the projects DB insert into edge select project_phid, 42, task_phid, 0, 0, NULL from storyboard.task_project; use phabricator_maniphest -- We have projects now, so inject them into the name mapping table in the -- bug system insert into maniphest_nameindex select id, phid, name from phabricator_project.project; update maniphest_task t, storyboard.task_project_grouping g set t.projectPHIDs = g.phids where t.phid = g.task_phid; -- Associate tasks with projects from the task side insert into edge select task_phid, 41, project_phid, 0, 0, NULL from storyboard.task_project; -- Relationship -- If the task phid matches a storyboard task.phid, then it's a -- subtask, and we should take the storyPHID from that storyboard.task -- and use it to create the parent/child edges -- Create Parent Edge: -- src = story.phid, type = 3, dst = task.phid -- Create Child Backref: -- src = task.phid, type = 4, dst = story.phid insert into edge select storyPHID, 3, phid, 0, 0, NULL from storyboard.task_subtask; insert into edge select phid, 4, storyPHID, 0, 0, NULL from storyboard.task_subtask;