Fuel tests
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_ceph.py 42KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129
  1. # Copyright 2014 Mirantis, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  4. # not use this file except in compliance with the License. You may obtain
  5. # a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. # License for the specific language governing permissions and limitations
  13. # under the License.
  14. from __future__ import unicode_literals
  15. import time
  16. import paramiko
  17. from pkg_resources import parse_version
  18. from proboscis.asserts import assert_true, assert_false, assert_equal
  19. from proboscis import SkipTest
  20. from proboscis import test
  21. from devops.helpers.helpers import tcp_ping
  22. from devops.helpers.helpers import wait
  23. from devops.helpers.ssh_client import SSHAuth
  24. from six import BytesIO
  25. # pylint: disable=import-error
  26. # noinspection PyUnresolvedReferences
  27. from six.moves import configparser
  28. # noinspection PyUnresolvedReferences
  29. from six.moves import cStringIO
  30. # pylint: enable=import-error
  31. from fuelweb_test.helpers import os_actions
  32. from fuelweb_test.helpers import ceph
  33. from fuelweb_test.helpers import checkers
  34. from fuelweb_test.helpers import utils
  35. from fuelweb_test.helpers.decorators import log_snapshot_after_test
  36. from fuelweb_test.helpers.ovs import ovs_get_tag_by_port
  37. from fuelweb_test import ostf_test_mapping
  38. from fuelweb_test import settings
  39. from fuelweb_test import logger
  40. from fuelweb_test.tests.base_test_case import SetupEnvironment
  41. from fuelweb_test.tests.base_test_case import TestBasic
  42. @test(groups=["ceph_ha_one_controller", "ceph"])
  43. class CephCompact(TestBasic):
  44. """CephCompact.""" # TODO documentation
  45. @test(depends_on=[SetupEnvironment.prepare_slaves_3],
  46. groups=["ceph_ha_one_controller_compact",
  47. "ha_one_controller_nova_ceph",
  48. "ceph_ha_one_controller_compact_neutron", "ceph",
  49. "nova", "deployment"])
  50. @log_snapshot_after_test
  51. def ceph_ha_one_controller_compact(self):
  52. """Deploy ceph in HA mode with 1 controller
  53. Scenario:
  54. 1. Create cluster
  55. 2. Add 1 node with controller and ceph OSD roles
  56. 3. Add 2 nodes with compute and ceph OSD roles
  57. 4. Deploy the cluster
  58. 5. Check ceph status
  59. Duration 35m
  60. Snapshot ceph_ha_one_controller_compact
  61. """
  62. self.check_run('ceph_ha_one_controller_compact')
  63. self.env.revert_snapshot("ready_with_3_slaves")
  64. data = {
  65. 'volumes_ceph': True,
  66. 'images_ceph': True,
  67. 'volumes_lvm': False,
  68. 'tenant': 'ceph1',
  69. 'user': 'ceph1',
  70. 'password': 'ceph1',
  71. 'net_provider': 'neutron',
  72. 'net_segment_type': settings.NEUTRON_SEGMENT['vlan']
  73. }
  74. cluster_id = self.fuel_web.create_cluster(
  75. name=self.__class__.__name__,
  76. mode=settings.DEPLOYMENT_MODE,
  77. settings=data)
  78. self.fuel_web.update_nodes(
  79. cluster_id,
  80. {
  81. 'slave-01': ['controller', 'ceph-osd'],
  82. 'slave-02': ['compute', 'ceph-osd'],
  83. 'slave-03': ['compute', 'ceph-osd']
  84. }
  85. )
  86. # Cluster deploy
  87. self.fuel_web.deploy_cluster_wait(cluster_id)
  88. self.fuel_web.check_ceph_status(cluster_id)
  89. # Run ostf
  90. self.fuel_web.run_ostf(cluster_id=cluster_id)
  91. self.env.make_snapshot("ceph_ha_one_controller_compact", is_make=True)
  92. @test(depends_on=[ceph_ha_one_controller_compact],
  93. groups=["check_ceph_cinder_cow"])
  94. @log_snapshot_after_test
  95. def check_ceph_cinder_cow(self):
  96. """Check copy-on-write when Cinder creates a volume from Glance image
  97. Scenario:
  98. 1. Revert a snapshot where ceph enabled for volumes and images:
  99. "ceph_ha_one_controller_compact"
  100. 2. Create a Glance image in RAW disk format
  101. 3. Create a Cinder volume using Glance image in RAW disk format
  102. 4. Check on a ceph-osd node if the volume has a parent image.
  103. Duration 5m
  104. """
  105. self.env.revert_snapshot("ceph_ha_one_controller_compact")
  106. cluster_id = self.fuel_web.get_last_created_cluster()
  107. os_conn = os_actions.OpenStackActions(
  108. self.fuel_web.get_public_vip(cluster_id), 'ceph1', 'ceph1',
  109. 'ceph1')
  110. image_data = BytesIO(
  111. self.__class__.__name__.encode(encoding='ascii', errors='ignore'))
  112. image = os_conn.create_image(disk_format='raw',
  113. container_format='bare',
  114. name='test_ceph_cinder_cow',
  115. is_public=True,
  116. data=image_data)
  117. wait(lambda: os_conn.get_image(image.name).status == 'active',
  118. timeout=60 * 2, timeout_msg='Image is not active')
  119. volume = os_conn.create_volume(size=1, image_id=image.id)
  120. with self.fuel_web.get_ssh_for_node('slave-01') as remote:
  121. rbd_list = ceph.get_rbd_images_list(remote, 'volumes')
  122. for item in rbd_list:
  123. if volume.id in item['image']:
  124. assert_true('parent' in item,
  125. "Volume {0} created from image {1} doesn't have"
  126. " parents. Copy-on-write feature doesn't work."
  127. .format(volume.id, image.id))
  128. assert_true(image.id in item['parent']['image'],
  129. "Volume {0} created from image {1}, but have a "
  130. "different image in parent: {2}"
  131. .format(volume.id, image.id,
  132. item['parent']['image']))
  133. break
  134. else:
  135. raise Exception("Volume {0} not found!".format(volume.id))
  136. @test(groups=["thread_3", "ceph"])
  137. class CephCompactWithCinder(TestBasic):
  138. """CephCompactWithCinder.""" # TODO documentation
  139. @test(depends_on=[SetupEnvironment.prepare_release],
  140. groups=["ceph_ha_one_controller_with_cinder"])
  141. @log_snapshot_after_test
  142. def ceph_ha_one_controller_with_cinder(self):
  143. """Deploy ceph with cinder in ha mode with 1 controller
  144. Scenario:
  145. 1. Create cluster
  146. 2. Add 1 node with controller role
  147. 3. Add 1 node with compute role
  148. 4. Add 2 nodes with cinder and ceph OSD roles
  149. 5. Deploy the cluster
  150. 6. Check ceph status
  151. 7. Check partitions on controller node
  152. Duration 40m
  153. Snapshot ceph_ha_one_controller_with_cinder
  154. """
  155. try:
  156. self.check_run('ceph_ha_one_controller_with_cinder')
  157. except SkipTest:
  158. return
  159. self.env.revert_snapshot("ready")
  160. self.env.bootstrap_nodes(
  161. self.env.d_env.nodes().slaves[:4])
  162. cluster_id = self.fuel_web.create_cluster(
  163. name=self.__class__.__name__,
  164. mode=settings.DEPLOYMENT_MODE,
  165. settings={
  166. 'volumes_ceph': False,
  167. 'images_ceph': True,
  168. 'osd_pool_size': '2',
  169. 'volumes_lvm': True,
  170. 'tenant': 'ceph2',
  171. 'user': 'ceph2',
  172. 'password': 'ceph2'
  173. }
  174. )
  175. self.fuel_web.update_nodes(
  176. cluster_id,
  177. {
  178. 'slave-01': ['controller'],
  179. 'slave-02': ['compute'],
  180. 'slave-03': ['cinder', 'ceph-osd'],
  181. 'slave-04': ['cinder', 'ceph-osd']
  182. }
  183. )
  184. # Cluster deploy
  185. self.fuel_web.deploy_cluster_wait(cluster_id)
  186. self.fuel_web.check_ceph_status(cluster_id)
  187. disks = self.fuel_web.client.get_node_disks(
  188. self.fuel_web.get_nailgun_node_by_name('slave-01')['id'])
  189. logger.info("Current disk partitions are: \n{d}".format(d=disks))
  190. logger.info("Check unallocated space")
  191. # We expect failure here only for release 5.0 due to bug
  192. # https://bugs.launchpad.net/fuel/+bug/1306625, so it is
  193. # necessary to assert_true in the next release.
  194. assert_false(
  195. checkers.check_unallocated_space(disks, contr_img_ceph=True),
  196. "Check unallocated space on controller")
  197. # Run ostf
  198. self.fuel_web.run_ostf(cluster_id=cluster_id)
  199. self.env.make_snapshot("ceph_ha_one_controller_with_cinder",
  200. is_make=True)
  201. @test(groups=["thread_3", "ceph"])
  202. class CephHA(TestBasic):
  203. """CephHA.""" # TODO documentation1
  204. @test(depends_on=[SetupEnvironment.prepare_release],
  205. groups=["ceph_ha", "classic_provisioning"])
  206. @log_snapshot_after_test
  207. def ceph_ha(self):
  208. """Deploy ceph with cinder in HA mode
  209. Scenario:
  210. 1. Create cluster
  211. 2. Add 3 nodes with controller and ceph OSD roles
  212. 3. Add 1 node with ceph OSD roles
  213. 4. Add 2 nodes with compute and ceph OSD roles
  214. 5. Deploy the cluster
  215. Duration 90m
  216. Snapshot ceph_ha
  217. """
  218. try:
  219. self.check_run('ceph_ha')
  220. except SkipTest:
  221. return
  222. self.env.revert_snapshot("ready")
  223. self.env.bootstrap_nodes(
  224. self.env.d_env.nodes().slaves[:6])
  225. data = {
  226. 'volumes_ceph': True,
  227. 'images_ceph': True,
  228. 'volumes_lvm': False,
  229. 'tenant': 'cephHA',
  230. 'user': 'cephHA',
  231. 'password': 'cephHA',
  232. 'osd_pool_size': "3",
  233. 'net_provider': 'neutron',
  234. 'net_segment_type': settings.NEUTRON_SEGMENT['vlan']
  235. }
  236. cluster_id = self.fuel_web.create_cluster(
  237. name=self.__class__.__name__,
  238. mode=settings.DEPLOYMENT_MODE,
  239. settings=data
  240. )
  241. self.fuel_web.update_nodes(
  242. cluster_id,
  243. {
  244. 'slave-01': ['controller', 'ceph-osd'],
  245. 'slave-02': ['controller', 'ceph-osd'],
  246. 'slave-03': ['controller', 'ceph-osd'],
  247. 'slave-04': ['compute', 'ceph-osd'],
  248. 'slave-05': ['compute', 'ceph-osd'],
  249. 'slave-06': ['ceph-osd']
  250. }
  251. )
  252. # Deploy cluster
  253. self.fuel_web.deploy_cluster_wait(cluster_id)
  254. self.env.make_snapshot("ceph_ha", is_make=True)
  255. @test(depends_on=[ceph_ha],
  256. groups=["ha_nova_ceph", "ha_neutron_ceph", "check_ceph_ha"])
  257. @log_snapshot_after_test
  258. def check_ceph_ha(self):
  259. """Check ceph with cinder in HA mode
  260. Scenario:
  261. 1. Revert snapshot with ceph cluster in HA mode
  262. 2. Check ceph status
  263. 3. Check ceph version, should be consistent across nodes
  264. Duration 10m
  265. Snapshot check_ceph_ha
  266. """
  267. self.env.revert_snapshot("ceph_ha")
  268. cluster_id = self.fuel_web.get_last_created_cluster()
  269. self.fuel_web.check_ceph_status(cluster_id)
  270. versions = []
  271. for node in self.fuel_web.client.list_cluster_nodes(cluster_id):
  272. role = '_'.join(node['roles'])
  273. logger.debug('{} has role {}'.format(node['fqdn'], role))
  274. with self.fuel_web.get_ssh_for_nailgun_node(node) as remote:
  275. version = ceph.get_version(remote)
  276. logger.info('On {} ceph version is {}'.format(node['fqdn'],
  277. version))
  278. versions.append({'name': node['fqdn'], 'ceph_version': version})
  279. ceph_version = versions[0]['ceph_version']
  280. bad_nodes = [
  281. ver for ver in versions
  282. if parse_version(ver['ceph_version']) != parse_version(
  283. ceph_version)]
  284. assert_true(len(bad_nodes) == 0,
  285. message="Nodes should same Ceph version on all nodes. "
  286. "Expecting version {0}, the following nodes "
  287. "do not have this version: {1}".format(
  288. ceph_version, bad_nodes))
  289. # Run ostf
  290. self.fuel_web.run_ostf(
  291. cluster_id=cluster_id,
  292. test_sets=['ha', 'smoke', 'sanity'])
  293. @test(depends_on=[ceph_ha],
  294. groups=["openstack_stat"])
  295. @log_snapshot_after_test
  296. def check_openstack_stat(self):
  297. """Check openstack statistic on fuel and collector side
  298. Scenario:
  299. 1. Revert ceph_ha env
  300. 2. Create all openstack resources that are collected
  301. 3. Check that all info was collected on fuel side
  302. 4. Check that info was sent to collector
  303. 5. Check that info is properly saved on collector side
  304. Duration 20m
  305. Snapshot check_openstack_stat
  306. """
  307. self.env.revert_snapshot("ceph_ha")
  308. cluster_id = self.fuel_web.get_last_created_cluster()
  309. os_conn = os_actions.OpenStackActions(
  310. self.fuel_web.get_public_vip(cluster_id), 'cephHA', 'cephHA',
  311. 'cephHA')
  312. # Check resources addition
  313. # create instance
  314. net_name = self.fuel_web.get_cluster_predefined_networks_name(
  315. cluster_id)['private_net']
  316. server = os_conn.create_instance(
  317. neutron_network=True, label=net_name)
  318. # create flavor
  319. flavor = os_conn.create_flavor('openstackstat', 1024, 1, 1)
  320. # create volume
  321. volume = os_conn.create_volume()
  322. # create image
  323. devops_node = self.fuel_web.get_nailgun_primary_node(
  324. self.env.d_env.nodes().slaves[0])
  325. with self.fuel_web.get_ssh_for_node(devops_node.name) as slave:
  326. if settings.OPENSTACK_RELEASE_CENTOS in settings.OPENSTACK_RELEASE:
  327. slave.execute(". openrc; glance image-create --name"
  328. " 'custom-image' --disk-format qcow2"
  329. " --protected False --visibility public"
  330. " --container-format bare"
  331. " --file /opt/vm/cirros-x86_64-disk.img")
  332. else:
  333. slave.execute(
  334. ". openrc; glance image-create --name"
  335. " 'custom-image' --disk-format qcow2"
  336. " --protected False --visibility public"
  337. " --container-format bare --file"
  338. " /usr/share/cirros-testvm/cirros-x86_64-disk.img")
  339. image = os_conn.get_image_by_name('custom-image')
  340. logger.debug("image is {}".format(image))
  341. # create tenant and user
  342. tenant = os_conn.create_tenant("openstack_tenant")
  343. user = os_conn.create_user('openstack_user', 'qwerty', tenant)
  344. self.env.nailgun_actions.force_oswl_collect()
  345. self.env.nailgun_actions.force_fuel_stats_sending()
  346. master_uid = self.env.get_masternode_uuid()
  347. checkers.check_oswl_stat(self.env.postgres_actions,
  348. self.env.nailgun_actions, self.env.collector,
  349. master_uid, operation='current',
  350. resources=['vm', 'flavor', 'volume', 'image',
  351. 'tenant', 'keystone_user'])
  352. # Check resources modification
  353. # suspend instance
  354. server.suspend()
  355. # edit volume
  356. os_conn.extend_volume(volume, 2)
  357. # edit image
  358. os_conn.update_image(image, min_ram=333)
  359. # edit user
  360. os_conn.update_user_enabled(user, enabled=False)
  361. # edit tenant
  362. os_conn.update_tenant(tenant.id, enabled=False)
  363. self.env.nailgun_actions.force_oswl_collect()
  364. self.env.nailgun_actions.force_fuel_stats_sending()
  365. checkers.check_oswl_stat(self.env.postgres_actions,
  366. self.env.nailgun_actions, self.env.collector,
  367. master_uid, operation='modified',
  368. resources=['vm', 'volume', 'image',
  369. 'tenant', 'keystone_user'])
  370. # Check resources deletion
  371. # delete instance
  372. server.delete()
  373. # delete flavor
  374. os_conn.delete_flavor(flavor)
  375. # delete volume
  376. os_conn.delete_volume_and_wait(volume, timeout=300)
  377. # delete image
  378. os_conn.delete_image(image.id)
  379. # delete tenant
  380. os_conn.delete_tenant(tenant)
  381. # delete user
  382. os_conn.delete_user(user)
  383. self.env.nailgun_actions.force_oswl_collect()
  384. self.env.nailgun_actions.force_fuel_stats_sending()
  385. checkers.check_oswl_stat(self.env.postgres_actions,
  386. self.env.nailgun_actions, self.env.collector,
  387. master_uid, operation='removed',
  388. resources=['vm', 'flavor', 'volume', 'image',
  389. 'tenant', 'keystone_user'])
  390. @test(groups=["ha_neutron_tun", "ceph"])
  391. class CephRadosGW(TestBasic):
  392. """CephRadosGW.""" # TODO documentation
  393. @test(depends_on=[SetupEnvironment.prepare_release],
  394. groups=["ceph_rados_gw", "bvt_2", "ceph", "neutron", "deployment"])
  395. @log_snapshot_after_test
  396. def ceph_rados_gw(self):
  397. """Deploy ceph HA with RadosGW for objects
  398. Scenario:
  399. 1. Create cluster with Neutron
  400. 2. Add 3 nodes with controller role
  401. 3. Add 3 nodes with compute and ceph-osd role
  402. 4. Deploy the cluster
  403. 5. Check ceph status
  404. 6. Run OSTF tests
  405. 7. Check the radosgw daemon is started
  406. Duration 90m
  407. Snapshot ceph_rados_gw
  408. """
  409. def radosgw_started(remote):
  410. return remote.check_call('pkill -0 radosgw')['exit_code'] == 0
  411. self.env.revert_snapshot("ready")
  412. self.env.bootstrap_nodes(
  413. self.env.d_env.nodes().slaves[:6])
  414. cluster_id = self.fuel_web.create_cluster(
  415. name=self.__class__.__name__,
  416. mode=settings.DEPLOYMENT_MODE,
  417. settings={
  418. 'volumes_lvm': False,
  419. 'volumes_ceph': True,
  420. 'images_ceph': True,
  421. 'objects_ceph': True,
  422. 'tenant': 'rados',
  423. 'user': 'rados',
  424. 'password': 'rados'
  425. }
  426. )
  427. self.fuel_web.update_nodes(
  428. cluster_id,
  429. {
  430. 'slave-01': ['controller'],
  431. 'slave-02': ['controller'],
  432. 'slave-03': ['controller'],
  433. 'slave-04': ['compute', 'ceph-osd'],
  434. 'slave-05': ['compute', 'ceph-osd'],
  435. 'slave-06': ['compute', 'ceph-osd']
  436. }
  437. )
  438. self.fuel_web.verify_network(cluster_id)
  439. # Deploy cluster
  440. self.fuel_web.deploy_cluster_wait(cluster_id)
  441. # Network verification
  442. self.fuel_web.verify_network(cluster_id)
  443. # HAProxy backend checking
  444. controller_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
  445. cluster_id, ['controller'])
  446. for node in controller_nodes:
  447. logger.info("Check all HAProxy backends on {}".format(
  448. node['meta']['system']['fqdn']))
  449. haproxy_status = checkers.check_haproxy_backend(node['ip'])
  450. assert_equal(haproxy_status['exit_code'], 1,
  451. "HAProxy backends are DOWN. {0}".format(
  452. haproxy_status))
  453. self.fuel_web.check_ceph_status(cluster_id)
  454. # Run ostf
  455. self.fuel_web.run_ostf(cluster_id=cluster_id,
  456. test_sets=['ha', 'smoke', 'sanity'])
  457. # Check the radosgw daemon is started
  458. with self.fuel_web.get_ssh_for_node('slave-01') as remote:
  459. assert_true(radosgw_started(remote), 'radosgw daemon started')
  460. self.env.make_snapshot("ceph_rados_gw")
  461. @test(groups=["ceph_ha_one_controller", "ceph_migration"])
  462. class VmBackedWithCephMigrationBasic(TestBasic):
  463. """VmBackedWithCephMigrationBasic.""" # TODO documentation
  464. @test(depends_on=[SetupEnvironment.prepare_slaves_3],
  465. groups=["ceph_migration"])
  466. @log_snapshot_after_test
  467. def migrate_vm_backed_with_ceph(self):
  468. """Check VM backed with ceph migration in ha mode with 1 controller
  469. Scenario:
  470. 1. Create cluster
  471. 2. Add 1 node with controller and ceph OSD roles
  472. 3. Add 2 nodes with compute and ceph OSD roles
  473. 4. Deploy the cluster
  474. 5. Check ceph status
  475. 6. Run OSTF
  476. 7. Create a new VM, assign floating ip
  477. 8. Migrate VM
  478. 9. Check cluster and server state after migration
  479. 10. Terminate VM
  480. 11. Check that DHCP lease is not offered for MAC of deleted VM
  481. 12. Create a new VM for migration, assign floating ip
  482. 13. Create a volume and attach it to the VM
  483. 14. Create filesystem on the new volume and mount it to the VM
  484. 15. Migrate VM
  485. 16. Check that volume was mounted
  486. 17. Check cluster and server state after migration
  487. 18. Terminate VM
  488. Duration 35m
  489. Snapshot vm_backed_with_ceph_live_migration
  490. """
  491. self.env.revert_snapshot("ready_with_3_slaves")
  492. self.show_step(1, initialize=True)
  493. cluster_id = self.fuel_web.create_cluster(
  494. name=self.__class__.__name__,
  495. mode=settings.DEPLOYMENT_MODE,
  496. settings={
  497. 'volumes_ceph': True,
  498. 'images_ceph': True,
  499. 'ephemeral_ceph': True,
  500. 'volumes_lvm': False,
  501. }
  502. )
  503. self.show_step(2)
  504. self.show_step(3)
  505. self.fuel_web.update_nodes(
  506. cluster_id,
  507. {
  508. 'slave-01': ['controller', 'ceph-osd'],
  509. 'slave-02': ['compute', 'ceph-osd'],
  510. 'slave-03': ['compute', 'ceph-osd']
  511. }
  512. )
  513. creds = SSHAuth(username="cirros", password="test")
  514. self.show_step(4)
  515. # Cluster deploy
  516. self.fuel_web.deploy_cluster_wait(cluster_id)
  517. def _check():
  518. # Run volume test several times with hope that it pass
  519. test_path = ostf_test_mapping.OSTF_TEST_MAPPING.get(
  520. 'Create volume and attach it to instance')
  521. logger.debug('Start to run test {0}'.format(test_path))
  522. self.fuel_web.run_single_ostf_test(
  523. cluster_id, test_sets=['smoke'],
  524. test_name=test_path)
  525. self.show_step(5)
  526. try:
  527. _check()
  528. except AssertionError:
  529. logger.debug(AssertionError)
  530. logger.debug("Test failed from first probe,"
  531. " we sleep 60 second try one more time "
  532. "and if it fails again - test will fails ")
  533. time.sleep(60)
  534. _check()
  535. self.show_step(6)
  536. # Run ostf
  537. self.fuel_web.run_ostf(cluster_id)
  538. self.show_step(7)
  539. # Create new server
  540. os = os_actions.OpenStackActions(
  541. self.fuel_web.get_public_vip(cluster_id))
  542. net_name = self.fuel_web.get_cluster_predefined_networks_name(
  543. cluster_id)['private_net']
  544. logger.info("Create new server")
  545. srv = os.create_server_for_migration(
  546. neutron=True,
  547. scenario='./fuelweb_test/helpers/instance_initial_scenario',
  548. label=net_name)
  549. logger.info("Srv is currently in status: {:s}".format(srv.status))
  550. # Prepare to DHCP leases checks
  551. srv_instance_ip = os.get_nova_instance_ip(srv, net_name=net_name)
  552. srv_host_name = self.fuel_web.find_devops_node_by_nailgun_fqdn(
  553. os.get_srv_hypervisor_name(srv),
  554. self.env.d_env.nodes().slaves[:3]).name
  555. net_id = os.get_network(net_name)['id']
  556. ports = os.get_neutron_dhcp_ports(net_id)
  557. dhcp_server_ip = ports[0]['fixed_ips'][0]['ip_address']
  558. with self.fuel_web.get_ssh_for_node(srv_host_name) as srv_remote_node:
  559. srv_instance_mac = os.get_instance_mac(srv_remote_node, srv)
  560. logger.info("Assigning floating ip to server")
  561. floating_ip = os.assign_floating_ip(srv)
  562. srv_host = os.get_srv_host_name(srv)
  563. logger.info("Server is on host {:s}".format(srv_host))
  564. wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120,
  565. timeout_msg='new VM ssh port ping timeout')
  566. def ssh_ready(remote, ip, creds):
  567. """SSH Ready status
  568. :type ip: str
  569. :type creds: SSHAuth
  570. """
  571. try:
  572. remote.execute_through_host(ip, '/bin/true', creds)
  573. return True
  574. except paramiko.AuthenticationException:
  575. logger.info("Authentication failed. Trying again in a minute.")
  576. time.sleep(60)
  577. return False
  578. with self.fuel_web.get_ssh_for_node("slave-01") as remote:
  579. wait(lambda: ssh_ready(remote, floating_ip.ip, creds), timeout=300)
  580. md5before = remote.execute_through_host(
  581. floating_ip.ip,
  582. "md5sum {:s}".format("/home/test_file"),
  583. auth=creds).stdout_str
  584. self.show_step(8)
  585. logger.info("Get available computes")
  586. avail_hosts = os.get_hosts_for_migr(srv_host)
  587. logger.info("Migrating server")
  588. new_srv = os.migrate_server(srv, avail_hosts[0], timeout=200)
  589. logger.info("Check cluster and server state after migration")
  590. wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120,
  591. timeout_msg='VM ssh port ping timeout after migration')
  592. with self.fuel_web.get_ssh_for_node("slave-01") as remote:
  593. md5after = remote.execute_through_host(
  594. floating_ip.ip,
  595. "md5sum {:s}".format("/home/test_file"),
  596. auth=creds).stdout_str
  597. checkers.diff_md5(md5before, md5after)
  598. self.show_step(9)
  599. with self.fuel_web.get_ssh_for_node("slave-01") as remote:
  600. res = remote.execute_through_host(
  601. floating_ip.ip,
  602. "ping -q -c3 -w10 {0} | grep 'received' |"
  603. " grep -v '0 packets received'"
  604. .format(settings.PUBLIC_TEST_IP),
  605. auth=creds)
  606. logger.info("Ping {0} result on vm is: {1}"
  607. .format(settings.PUBLIC_TEST_IP, res['stdout']))
  608. logger.info("Check Ceph health is ok after migration")
  609. self.fuel_web.check_ceph_status(cluster_id)
  610. logger.info(
  611. "Server is now on host {:s}".format(os.get_srv_host_name(new_srv)))
  612. self.show_step(10)
  613. logger.info("Terminate migrated server")
  614. os.delete_instance(new_srv)
  615. os.verify_srv_deleted(new_srv)
  616. self.show_step(11)
  617. # Check if the dhcp lease for instance still remains
  618. # on the previous compute node. Related Bug: #1391010
  619. _ip = self.fuel_web.get_nailgun_node_by_name('slave-01')['ip']
  620. with self.fuel_web.get_ssh_for_node('slave-01') as remote:
  621. dhcp_port_tag = ovs_get_tag_by_port(remote, ports[0]['id'])
  622. assert_false(checkers.check_neutron_dhcp_lease(_ip,
  623. srv_instance_ip,
  624. srv_instance_mac,
  625. dhcp_server_ip,
  626. dhcp_port_tag),
  627. "Instance has been deleted, but it's DHCP lease "
  628. "for IP:{0} with MAC:{1} still offers by Neutron DHCP"
  629. " agent.".format(srv_instance_ip,
  630. srv_instance_mac))
  631. self.show_step(12)
  632. # Create a new server
  633. logger.info("Create a new server for migration with volume")
  634. srv = os.create_server_for_migration(
  635. neutron=True,
  636. scenario='./fuelweb_test/helpers/instance_initial_scenario',
  637. label=net_name)
  638. logger.info("Srv is currently in status: {:s}".format(srv.status))
  639. logger.info("Assigning floating ip to server")
  640. floating_ip = os.assign_floating_ip(srv)
  641. srv_host = os.get_srv_host_name(srv)
  642. logger.info("Server is on host {:s}".format(srv_host))
  643. self.show_step(13)
  644. logger.info("Create volume")
  645. vol = os.create_volume()
  646. logger.info("Attach volume to server")
  647. os.attach_volume(vol, srv)
  648. self.show_step(14)
  649. wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120,
  650. timeout_msg='new VM ssh port ping timeout')
  651. logger.info("Create filesystem and mount volume")
  652. with self.fuel_web.get_ssh_for_node("slave-01") as remote:
  653. wait(lambda: ssh_ready(remote, floating_ip.ip, creds), timeout=300)
  654. remote.execute_through_host(
  655. floating_ip.ip,
  656. 'sudo sh /home/mount_volume.sh',
  657. auth=creds)
  658. remote.execute_through_host(
  659. floating_ip.ip,
  660. 'sudo touch /mnt/file-on-volume',
  661. auth=creds)
  662. self.show_step(15)
  663. logger.info("Get available computes")
  664. avail_hosts = os.get_hosts_for_migr(srv_host)
  665. logger.info("Migrating server")
  666. new_srv = os.migrate_server(srv, avail_hosts[0], timeout=120)
  667. logger.info("Check cluster and server state after migration")
  668. wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120,
  669. timeout_msg='VM ssh port ping timeout after migration')
  670. self.show_step(16)
  671. logger.info("Check that volume was mounted")
  672. with self.fuel_web.get_ssh_for_node("slave-01") as remote:
  673. out = remote.execute_through_host(
  674. floating_ip.ip,
  675. 'mount | grep "/dev/vdb on /mnt"',
  676. auth=creds)
  677. assert_true(out['stdout'] and out['exit_code'] == 0,
  678. "Volume was not mounted")
  679. with self.fuel_web.get_ssh_for_node("slave-01") as remote:
  680. out = remote.execute_through_host(
  681. floating_ip.ip,
  682. "sudo ls /mnt",
  683. auth=creds)
  684. assert_true("file-on-volume\n" in out['stdout'],
  685. "File is absent in /mnt")
  686. self.show_step(17)
  687. logger.info("Check Ceph health is ok after migration")
  688. self.fuel_web.check_ceph_status(cluster_id)
  689. logger.info(
  690. "Server is now on host {:s}".format(os.get_srv_host_name(new_srv)))
  691. self.show_step(18)
  692. logger.info("Terminate migrated server")
  693. os.delete_instance(new_srv)
  694. os.verify_srv_deleted(new_srv)
  695. self.env.make_snapshot(
  696. "vm_backed_with_ceph_live_migration")
  697. @test(groups=["ceph_ha_one_controller", "ceph_partitions"])
  698. class CheckCephPartitionsAfterReboot(TestBasic):
  699. """CheckCephPartitionsAfterReboot.""" # TODO documentation
  700. @test(depends_on=[SetupEnvironment.prepare_slaves_3],
  701. groups=["ceph_partitions"])
  702. @log_snapshot_after_test
  703. def check_ceph_partitions_after_reboot(self):
  704. """Check that Ceph OSD partitions are remounted after reboot
  705. Scenario:
  706. 1. Create cluster in Ha mode with 1 controller
  707. 2. Add 1 node with controller role
  708. 3. Add 1 node with compute and Ceph OSD roles
  709. 4. Add 1 node with Ceph OSD role
  710. 5. Deploy the cluster
  711. 6. Check Ceph status
  712. 7. Read current partitions
  713. 8. Warm-reboot Ceph nodes
  714. 9. Read partitions again
  715. 10. Check Ceph health
  716. 11. Cold-reboot Ceph nodes
  717. 12. Read partitions again
  718. 13. Check Ceph health
  719. Duration 40m
  720. Snapshot check_ceph_partitions_after_reboot
  721. """
  722. self.env.revert_snapshot("ready_with_3_slaves")
  723. self.show_step(1, initialize=True)
  724. cluster_id = self.fuel_web.create_cluster(
  725. name=self.__class__.__name__,
  726. mode=settings.DEPLOYMENT_MODE,
  727. settings={
  728. 'volumes_ceph': True,
  729. 'images_ceph': True,
  730. 'osd_pool_size': '2',
  731. 'ephemeral_ceph': True,
  732. 'volumes_lvm': False,
  733. }
  734. )
  735. self.show_step(2)
  736. self.show_step(3)
  737. self.show_step(4)
  738. self.fuel_web.update_nodes(
  739. cluster_id,
  740. {
  741. 'slave-01': ['controller'],
  742. 'slave-02': ['compute', 'ceph-osd'],
  743. 'slave-03': ['ceph-osd']
  744. }
  745. )
  746. self.show_step(5)
  747. # Deploy cluster
  748. self.fuel_web.deploy_cluster_wait(cluster_id)
  749. self.show_step(6)
  750. for node in ["slave-02", "slave-03"]:
  751. self.show_step(7, node, True)
  752. logger.info("Get partitions for {node}".format(node=node))
  753. _ip = self.fuel_web.get_nailgun_node_by_name(node)['ip']
  754. before_reboot_partitions = [utils.get_ceph_partitions(
  755. _ip,
  756. "/dev/vd{p}".format(p=part)) for part in ["b", "c"]]
  757. self.show_step(8, node)
  758. logger.info("Warm-restart nodes")
  759. self.fuel_web.warm_restart_nodes(
  760. [self.fuel_web.environment.d_env.get_node(name=node)])
  761. self.show_step(9, node)
  762. logger.info("Get partitions for {node} once again".format(
  763. node=node
  764. ))
  765. _ip = self.fuel_web.get_nailgun_node_by_name(node)['ip']
  766. after_reboot_partitions = [utils.get_ceph_partitions(
  767. _ip,
  768. "/dev/vd{p}".format(p=part)) for part in ["b", "c"]]
  769. if before_reboot_partitions != after_reboot_partitions:
  770. logger.info("Partitions don`t match")
  771. logger.info("Before reboot: "
  772. "{:s}".format(before_reboot_partitions))
  773. logger.info("After reboot: "
  774. "{:s}".format(after_reboot_partitions))
  775. raise Exception()
  776. self.show_step(10, node)
  777. logger.info("Check Ceph health is ok after reboot")
  778. self.fuel_web.check_ceph_status(cluster_id)
  779. self.show_step(11, node)
  780. logger.info("Cold-restart nodes")
  781. self.fuel_web.cold_restart_nodes(
  782. [self.fuel_web.environment.d_env.get_node(name=node)])
  783. self.show_step(12, node)
  784. _ip = self.fuel_web.get_nailgun_node_by_name(node)['ip']
  785. after_reboot_partitions = [utils.get_ceph_partitions(
  786. _ip,
  787. "/dev/vd{p}".format(p=part)) for part in ["b", "c"]]
  788. if before_reboot_partitions != after_reboot_partitions:
  789. logger.info("Partitions don`t match")
  790. logger.info("Before reboot: "
  791. "{:s}".format(before_reboot_partitions))
  792. logger.info("After reboot: "
  793. "{:s}".format(after_reboot_partitions))
  794. raise Exception()
  795. self.show_step(13, node)
  796. logger.info("Check Ceph health is ok after reboot")
  797. self.fuel_web.check_ceph_status(cluster_id)
  798. @test(groups=["default_storage_rados_gw", "ceph"])
  799. class RadosGW(TestBasic):
  800. """RadosGW.""" # TODO documentation
  801. @test(depends_on=[SetupEnvironment.prepare_release],
  802. groups=["radosgw_without_os_services_usage"])
  803. @log_snapshot_after_test
  804. def radosgw_without_os_services_usage(self):
  805. """Deploy ceph HA with RadosGW for objects
  806. Scenario:
  807. 1. Create cluster with RadosGW enabled
  808. 2. Add 3 nodes with controller role
  809. 3. Add 2 nodes with compute and ceph-osd role
  810. 4. Add 2 nodes with ceph-osd role
  811. 5. Verify Network
  812. 6. Deploy the cluster
  813. 7. Verify Network
  814. 8. Run OSTF tests
  815. 9. Check ceph status
  816. 10. Check the radosgw daemon is started
  817. 11. Create custom image via glance
  818. 12. Compare custom image IDs via glance and swift client
  819. 13. Check S3 API
  820. Duration 90m
  821. """
  822. self.show_step(1)
  823. self.env.revert_snapshot("ready")
  824. self.env.bootstrap_nodes(
  825. self.env.d_env.nodes().slaves[:7])
  826. cluster_id = self.fuel_web.create_cluster(
  827. name=self.__class__.__name__,
  828. mode=settings.DEPLOYMENT_MODE,
  829. settings={
  830. 'volumes_lvm': True,
  831. 'volumes_ceph': False,
  832. 'images_ceph': False,
  833. 'objects_ceph': True
  834. }
  835. )
  836. self.show_step(2)
  837. self.show_step(3)
  838. self.show_step(4)
  839. self.fuel_web.update_nodes(
  840. cluster_id,
  841. {
  842. 'slave-01': ['controller'],
  843. 'slave-02': ['controller'],
  844. 'slave-03': ['controller'],
  845. 'slave-04': ['compute'],
  846. 'slave-05': ['compute', 'ceph-osd'],
  847. 'slave-06': ['ceph-osd'],
  848. 'slave-07': ['ceph-osd']
  849. }
  850. )
  851. self.show_step(5)
  852. self.fuel_web.verify_network(cluster_id)
  853. self.show_step(6)
  854. self.fuel_web.deploy_cluster_wait(cluster_id)
  855. self.show_step(7)
  856. self.fuel_web.verify_network(cluster_id)
  857. self.show_step(8)
  858. self.fuel_web.run_ostf(cluster_id=cluster_id,
  859. test_sets=['ha', 'smoke', 'sanity'])
  860. self.show_step(9)
  861. self.fuel_web.check_ceph_status(cluster_id)
  862. self.show_step(10)
  863. devops_node = self.fuel_web.get_nailgun_primary_node(
  864. self.env.d_env.nodes().slaves[0])
  865. node = self.fuel_web.get_nailgun_node_by_devops_node(devops_node)
  866. self.ssh_manager.execute_on_remote(
  867. ip=node['ip'],
  868. cmd="pkill -0 radosgw")
  869. self.show_step(11)
  870. self.ssh_manager.execute_on_remote(
  871. ip=node['ip'],
  872. cmd=". openrc; glance image-create --name"
  873. " 'custom-image' --disk-format qcow2"
  874. " --protected False --visibility public"
  875. " --container-format bare --file"
  876. " /usr/share/cirros-testvm/cirros-x86_64-disk.img")
  877. settings_source = '/etc/glance/glance-api.conf'
  878. openrc = '~/openrc'
  879. settings_list = (
  880. "admin_tenant_name", "admin_user", "admin_password")
  881. openrc_settings = (
  882. "OS_TENANT_NAME", "OS_PROJECT_NAME", "OS_USERNAME", "OS_PASSWORD")
  883. glance_config = self.ssh_manager.execute_on_remote(
  884. ip=node['ip'],
  885. cmd="cat {0} | egrep -v '^#'".format(
  886. settings_source))['stdout_str']
  887. glance_config_file = cStringIO(glance_config)
  888. parser = configparser.ConfigParser()
  889. parser.readfp(glance_config_file)
  890. settings_value = [
  891. parser.get('keystone_authtoken', value) for value in settings_list]
  892. settings_value.insert(0, settings_value[0])
  893. for val in zip(openrc_settings, settings_value):
  894. self.ssh_manager.execute_on_remote(
  895. ip=node['ip'],
  896. cmd="sed -ie '/{0}=/ s/admin/{1}/g' {2}".format(
  897. val[0], val[1], openrc))
  898. self.ssh_manager.execute_on_remote(
  899. ip=node['ip'],
  900. cmd="sed -i 's/5000/5000\/v2.0/g' {0}".format(openrc))
  901. glance_image_id = self.ssh_manager.execute_on_remote(
  902. ip=node['ip'],
  903. cmd=". openrc; glance image-list | "
  904. "grep custom-image")['stdout'][0].split("|")[1].strip()
  905. swift_image_ids = self.ssh_manager.execute_on_remote(
  906. ip=node['ip'],
  907. cmd=". openrc; swift list glance")['stdout']
  908. self.show_step(12)
  909. if glance_image_id in [image_id.rstrip()
  910. for image_id in swift_image_ids]:
  911. logger.debug(
  912. "Glance image {0} was found "
  913. "in the swift_image_ids {1}".format(
  914. glance_image_id, swift_image_ids))
  915. else:
  916. raise Exception(
  917. "The glance_image_id {0} was not found "
  918. "in the list swift_image_ids {1}".format(
  919. glance_image_id, swift_image_ids))
  920. self.show_step(13)
  921. keys = self.ssh_manager.execute_on_remote(
  922. ip=node['ip'],
  923. cmd='radosgw-admin user create '
  924. '--uid="s3_main" --display-name="s3_main"',
  925. jsonify=True)['stdout_json']['keys'][0]
  926. access_key = keys['access_key']
  927. secret_key = keys['secret_key']
  928. self.ssh_manager.execute_on_remote(
  929. ip=node['ip'],
  930. cmd="apt-get install -y python-pip")
  931. self.ssh_manager.execute_on_remote(
  932. ip=node['ip'],
  933. cmd="pip install {0}".format(settings.S3_API_CLIENT))
  934. pub_contr_ip = self.ssh_manager.execute_on_remote(
  935. ip=node['ip'],
  936. cmd="ip -o -4 addr "
  937. "show br-ex")['stdout'][0].split()[3].split('/')[0]
  938. self.ssh_manager.execute_on_remote(
  939. ip=node['ip'],
  940. cmd="s3cmd --access_key={0} --secret_key={1} "
  941. "--no-ssl --host={2}:6780 mb s3://test_bucket".format(
  942. access_key, secret_key, pub_contr_ip))
  943. result = self.ssh_manager.execute_on_remote(
  944. ip=node['ip'],
  945. cmd="{0} --access_key={1} --secret_key={2} "
  946. "--no-ssl --host={3}:6780 ls".format(
  947. settings.S3_API_CLIENT, access_key, secret_key,
  948. pub_contr_ip))
  949. if 'test_bucket' not in result['stdout_str']:
  950. raise Exception(
  951. "The S3 API call failed: {0}".format(result['stderr']))