首页 > 代码库 > OpenStack之虚机冷迁移代码简析
OpenStack之虚机冷迁移代码简析
OpenStack之虚机冷迁移代码简析
前不久我们看了openstack的热迁移代码,并进行了简单的分析。真的,很简单的分析。现在天气凉了,为了应时令,再简析下虚机冷迁移的代码。
还是老样子,前端的Horizon代码就省去了,直接看后端的代码实现,前端通过请求的action进入到nova/api/openstack/compute/contrib/admin_actions.py文件。代码如下:
1 @wsgi.action(‘migrate‘) 2 def _migrate(self, req, id, body): 3 """Permit admins to migrate a server to a new host.""" 4 context = req.environ[‘nova.context‘] 5 authorize(context, ‘migrate‘) 6 try: 7 instance = self.compute_api.get(context, id, want_objects=True) 8 self.compute_api.resize(req.environ[‘nova.context‘], instance) 9 except exception.QuotaError as error:10 raise exc.HTTPRequestEntityTooLarge(11 explanation=error.format_message(),12 headers={‘Retry-After‘: 0})13 except exception.InstanceIsLocked as e:14 raise exc.HTTPConflict(explanation=e.format_message())15 except exception.InstanceInvalidState as state_error:16 common.raise_http_conflict_for_instance_invalid_state(state_error,17 ‘migrate‘)18 except exception.InstanceNotFound as e:19 raise exc.HTTPNotFound(explanation=e.format_message())20 except Exception as e:21 LOG.exception(_("Error in migrate %s"), e)22 raise exc.HTTPBadRequest()23 return webob.Response(status_int=202)
首先,第7行可以看到,获得虚机,然后,第8行是主要的实现方法,那么问题来了:冷迁移竟然调用和resize一样的方法!!!赶紧跟进,进入到nova/compute/api.py文件中:
1 def resize(self, context, instance, flavor_id=None, 2 **extra_instance_updates): 3 """ 4 5 如果flavor_id是空的,该过程将被视为迁移,还使用之前的flavor_id; 6 如果非空的话,虚机将被迁移到新的主机上,并且使用新的flavor_id 7 8 """ 9 self._check_auto_disk_config(instance, **extra_instance_updates) 10 11 current_instance_type = flavors.extract_flavor(instance) 12 13 # 如果没有flavor_id的话,就只进行虚机的迁移 14 if not flavor_id: 15 LOG.debug(_("flavor_id is None. Assuming migration."), 16 instance=instance) 17 new_instance_type = current_instance_type 18 else: 19 new_instance_type = flavors.get_flavor_by_flavor_id( 20 flavor_id, read_deleted="no") 21 22 current_instance_type_name = current_instance_type[‘name‘] 23 new_instance_type_name = new_instance_type[‘name‘] 24 LOG.debug(_("Old instance type %(current_instance_type_name)s, " 25 " new instance type %(new_instance_type_name)s"), 26 {‘current_instance_type_name‘: current_instance_type_name, 27 ‘new_instance_type_name‘: new_instance_type_name}, 28 instance=instance) 29 30 if not new_instance_type: 31 raise exception.FlavorNotFound(flavor_id=flavor_id) 32 33 same_instance_type = (current_instance_type[‘id‘] == 34 new_instance_type[‘id‘]) 35 36 # 检查一下flavor,不强迫用户更改 37 38 if not same_instance_type and new_instance_type.get(‘disabled‘): 39 raise exception.FlavorNotFound(flavor_id=flavor_id) 40 41 if same_instance_type and flavor_id and self.cell_type != ‘compute‘: 42 raise exception.CannotResizeToSameFlavor() 43 44 # 检查是否有足够的空间支持扩展 45 deltas = self._upsize_quota_delta(context, new_instance_type, 46 current_instance_type) 47 try: 48 project_id, user_id = quotas_obj.ids_from_instance(context, 49 instance) 50 quotas = self._reserve_quota_delta(context, deltas, 51 project_id=project_id) 52 except exception.OverQuota as exc: 53 quotas = exc.kwargs[‘quotas‘] 54 overs = exc.kwargs[‘overs‘] 55 headroom = exc.kwargs[‘headroom‘] 56 57 resource = overs[0] 58 used = quotas[resource] - headroom[resource] 59 total_allowed = used + headroom[resource] 60 overs = ‘,‘.join(overs) 61 LOG.warn(_("%(overs)s quota exceeded for %(pid)s," 62 " tried to resize instance."), 63 {‘overs‘: overs, ‘pid‘: context.project_id}) 64 raise exception.TooManyInstances(overs=overs, 65 req=deltas[resource], 66 used=used, allowed=total_allowed, 67 resource=resource) 68 69 instance.task_state = task_states.RESIZE_PREP 70 instance.progress = 0 71 instance.update(extra_instance_updates) 72 instance.save(expected_task_state=[None]) 73 74 filter_properties = {‘ignore_hosts‘: []} 75 76 if not CONF.allow_resize_to_same_host: 77 filter_properties[‘ignore_hosts‘].append(instance[‘host‘]) 78 79 # 检查是否有flavor_id,如果没有的话就执行虚机迁移的工作 80 if (not flavor_id and not CONF.allow_migrate_to_same_host): 81 filter_properties[‘ignore_hosts‘].append(instance[‘host‘]) 82 83 if self.cell_type == ‘api‘: 84 # 创建迁移记录 85 self._resize_cells_support(context, quotas, instance, 86 current_instance_type, 87 new_instance_type) 88 89 self._record_action_start(context, instance, instance_actions.RESIZE) 90 91 scheduler_hint = {‘filter_properties‘: filter_properties} 92 self.compute_task_api.resize_instance(context, instance, 93 extra_instance_updates, scheduler_hint=scheduler_hint, 94 flavor=new_instance_type, 95 reservations=quotas.reservations or []) 96 97 @wrap_check_policy 98 @check_instance_lock 99 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,100 vm_states.PAUSED, vm_states.SUSPENDED],101 task_state=[None])
一些代码的分析直接写在代码上面了,97-101行是一些装饰器,用来进行一些状态之类的检查,92-95行调用conductor中的api,执行resize_instance函数,进入到nova/conductor/api.py中,找到resize_instance的代码:
1 def resize_instance(self, context, instance, extra_instance_updates,2 scheduler_hint, flavor, reservations):3 # ‘extra_instance_updates‘这个参数在这里用不到,4 # 但是留着它是为了这个方法能被cells_rpcapi各版本5 # 兼容考虑6 self._manager.migrate_server(7 context, instance, scheduler_hint, False, False, flavor,8 None, None, reservations)
这个方法没什么说的,直接调用nova/conductor/manager.py中的migrate_server方法:
1 def migrate_server(self, context, instance, scheduler_hint, live, rebuild, 2 flavor, block_migration, disk_over_commit, reservations=None): 3 if instance and not isinstance(instance, instance_obj.Instance): 4 # 在RPC API的V2版本之前,还支持 5 # 老版本的虚机 6 attrs = [‘metadata‘, ‘system_metadata‘, ‘info_cache‘, 7 ‘security_groups‘] 8 instance = instance_obj.Instance._from_db_object( 9 context, instance_obj.Instance(), instance,10 expected_attrs=attrs)11 if live and not rebuild and not flavor:12 self._live_migrate(context, instance, scheduler_hint,13 block_migration, disk_over_commit)14 elif not live and not rebuild and flavor:15 instance_uuid = instance[‘uuid‘]16 with compute_utils.EventReporter(context, self.db,17 ‘cold_migrate‘, instance_uuid):18 self._cold_migrate(context, instance, flavor,19 scheduler_hint[‘filter_properties‘],20 reservations)21 else:22 raise NotImplementedError()
这里第11行到13行大家都比较熟悉了,在OpenStack之虚机热迁移代码解析 中我们已经见过,没错,那个执行的就是虚机热迁移,当然14-20行就是我们本次要讲的虚机冷迁移咯,18-20行就是调用此冷迁移的函数,代码如下:
1 def _cold_migrate(self, context, instance, flavor, filter_properties, 2 reservations): 3 image_ref = instance.image_ref 4 image = compute_utils.get_image_metadata( 5 context, self.image_service, image_ref, instance) 6 7 request_spec = scheduler_utils.build_request_spec( 8 context, image, [instance], instance_type=flavor) 9 10 quotas = quotas_obj.Quotas.from_reservations(context,11 reservations,12 instance=instance)13 try:14 hosts = self.scheduler_rpcapi.select_destinations(15 context, request_spec, filter_properties)16 host_state = hosts[0]17 except exception.NoValidHost as ex:18 vm_state = instance[‘vm_state‘]19 if not vm_state:20 vm_state = vm_states.ACTIVE21 updates = {‘vm_state‘: vm_state, ‘task_state‘: None}22 self._set_vm_state_and_notify(context, ‘migrate_server‘,23 updates, ex, request_spec)24 quotas.rollback()25 26 LOG.warning(_("No valid host found for cold migrate"),27 instance=instance)28 return29 30 try:31 scheduler_utils.populate_filter_properties(filter_properties,32 host_state)33 34 filter_properties.pop(‘context‘, None)35 36 # 这里的instance_type在compute.api.resize中的request_spec木有“extra_specs”,37 # 所以为了向后兼容,就把这个给移除掉38 39 request_spec[‘instance_type‘].pop(‘extra_specs‘)40 41 (host, node) = (host_state[‘host‘], host_state[‘nodename‘])42 self.compute_rpcapi.prep_resize(43 context, image, instance,44 flavor, host,45 reservations, request_spec=request_spec,46 filter_properties=filter_properties, node=node)47 except Exception as ex:48 with excutils.save_and_reraise_exception():49 updates = {‘vm_state‘: instance[‘vm_state‘],50 ‘task_state‘: None}51 self._set_vm_state_and_notify(context, ‘migrate_server‘,52 updates, ex, request_spec)53 quotas.rollback()
前面几行首先得到一些个参数,然后使用调度算法自动获取一个最适合迁移的host,然后得到host以及前面的参数之后,就是42行的执行冷迁移也就是nova/compute/rpcapi.py中和nova/compute/manager中的pre_resize函数:
1 def prep_resize(self, ctxt, image, instance, instance_type, host, 2 reservations=None, request_spec=None, 3 filter_properties=None, node=None): 4 5 version = self._get_compat_version(‘3.0‘, ‘2.43‘) 6 instance_type_p = jsonutils.to_primitive(instance_type) 7 image_p = jsonutils.to_primitive(image) 8 cctxt = self.client.prepare(server=host, version=version) 9 cctxt.cast(ctxt, ‘prep_resize‘,10 instance=instance,11 instance_type=instance_type_p,12 image=image_p, reservations=reservations,13 request_spec=request_spec,14 filter_properties=filter_properties,15 node=node)
1 def prep_resize(self, context, image, instance, instance_type, 2 reservations, request_spec, filter_properties, node): 3 """ 4 5 开始将虚机迁移到另一个主机上的准备; 6 7 """ 8 if node is None: 9 node = self.driver.get_available_nodes(refresh=True)[0]10 LOG.debug(_("No node specified, defaulting to %s"), node,11 instance=instance)12 13 with self._error_out_instance_on_exception(context, instance[‘uuid‘],14 reservations):15 self.conductor_api.notify_usage_exists(16 context, instance, current_period=True)17 self._notify_about_instance_usage(18 context, instance, "resize.prep.start")19 try:20 self._prep_resize(context, image, instance,21 instance_type, reservations,22 request_spec, filter_properties,23 node)24 except Exception:25 26 exc_info = sys.exc_info()27 self._reschedule_resize_or_reraise(context, image, instance,28 exc_info, instance_type, reservations, request_spec,29 filter_properties)30 finally:31 extra_usage_info = dict(32 new_instance_type=instance_type[‘name‘],33 new_instance_type_id=instance_type[‘id‘])34 35 self._notify_about_instance_usage(36 context, instance, "resize.prep.end",37 extra_usage_info=extra_usage_info)
获得各种参数,并进行可用空间以及存在性检测,然后调用_pre_resize方法:
1 def _prep_resize(self, context, image, instance, instance_type, 2 reservations, request_spec, filter_properties, node): 3 4 if not filter_properties: 5 filter_properties = {} 6 7 if not instance[‘host‘]: 8 self._set_instance_error_state(context, instance[‘uuid‘]) 9 msg = _(‘Instance has no source host‘)10 raise exception.MigrationError(msg)11 12 same_host = instance[‘host‘] == self.host13 if same_host and not CONF.allow_resize_to_same_host:14 self._set_instance_error_state(context, instance[‘uuid‘])15 msg = _(‘destination same as source!‘)16 raise exception.MigrationError(msg)17 18 #保存新的instance_tyoe19 20 sys_meta = instance.system_metadata21 flavors.save_flavor_info(sys_meta, instance_type, prefix=‘new_‘)22 23 #保存虚机的状态,以便恢复到原先的虚机状态24 vm_state = instance[‘vm_state‘]25 LOG.debug(_(‘Stashing vm_state: %s‘), vm_state, instance=instance)26 sys_meta[‘old_vm_state‘] = vm_state27 instance.save()28 29 limits = filter_properties.get(‘limits‘, {})30 rt = self._get_resource_tracker(node)31 with rt.resize_claim(context, instance, instance_type,32 limits=limits) as claim:33 LOG.audit(_(‘Migrating‘), context=context, instance=instance)34 self.compute_rpcapi.resize_instance(context, instance,35 claim.migration, image, instance_type, reservations)36 37 @wrap_exception()38 @reverts_task_state39 @wrap_instance_event40 @wrap_instance_fault
最后在34行调用nova/compute/rpcapi.py中和nova/compute/manager.py中的resize_instance方法:
1 def resize_instance(self, ctxt, instance, migration, image, instance_type, 2 reservations=None): 3 4 version = self._get_compat_version(‘3.0‘, ‘2.45‘) 5 instance_type_p = jsonutils.to_primitive(instance_type) 6 cctxt = self.client.prepare(server=_compute_host(None, instance), 7 version=version) 8 cctxt.cast(ctxt, ‘resize_instance‘, 9 instance=instance, migration=migration,10 image=image, reservations=reservations,11 instance_type=instance_type_p)
1 resize_instance(self, context, instance, image, 2 reservations, migration, instance_type): 3 """开始虚机迁移""" 4 with self._error_out_instance_on_exception(context, instance.uuid, 5 reservations): 6 if not instance_type: 7 instance_type = flavor_obj.Flavor.get_by_id( 8 context, migration[‘new_instance_type_id‘]) 9 10 network_info = self._get_instance_nw_info(context, instance)11 12 migration.status = ‘migrating‘13 migration.save(context.elevated())14 15 instance.task_state = task_states.RESIZE_MIGRATING16 instance.save(expected_task_state=task_states.RESIZE_PREP)17 18 self._notify_about_instance_usage(19 context, instance, "resize.start", network_info=network_info)20 21 bdms = (block_device_obj.BlockDeviceMappingList.22 get_by_instance_uuid(context, instance.uuid))23 block_device_info = self._get_instance_volume_block_device_info(24 context, instance, bdms=bdms)25 26 disk_info = self.driver.migrate_disk_and_power_off(27 context, instance, migration.dest_host,28 instance_type, network_info,29 block_device_info)30 31 self._terminate_volume_connections(context, instance, bdms)32 33 migration_p = obj_base.obj_to_primitive(migration)34 instance_p = obj_base.obj_to_primitive(instance)35 self.conductor_api.network_migrate_instance_start(context,36 instance_p,37 migration_p)38 39 migration.status = ‘post-migrating‘40 migration.save(context.elevated())41 42 instance.host = migration.dest_compute43 instance.node = migration.dest_node44 instance.task_state = task_states.RESIZE_MIGRATED45 instance.save(expected_task_state=task_states.RESIZE_MIGRATING)46 47 self.compute_rpcapi.finish_resize(context, instance,48 migration, image, disk_info,49 migration.dest_compute, reservations=reservations)50 51 self._notify_about_instance_usage(context, instance, "resize.end",52 network_info=network_info)53 self.instance_events.clear_events_for_instance(instance)
各种参数的获取,以及磁盘信息的拷贝(26-29),最后调用finish_resize方法(47-49):
1 def finish_resize(self, context, disk_info, image, instance, 2 reservations, migration): 3 """完成迁移 4 5 在新的主机上打开虚机 6 7 8 """ 9 try:10 self._finish_resize(context, instance, migration,11 disk_info, image)12 self._quota_commit(context, reservations)13 except Exception as error:14 LOG.exception(_(‘Setting instance vm_state to ERROR‘),15 instance=instance)16 with excutils.save_and_reraise_exception():17 try:18 self._quota_rollback(context, reservations)19 except Exception as qr_error:20 LOG.exception(_("Failed to rollback quota for failed "21 "finish_resize: %s"),22 qr_error, instance=instance)23 self._set_instance_error_state(context, instance[‘uuid‘])24 25 @object_compat26 @wrap_exception()27 @reverts_task_state28 @wrap_instance_fault
调用_finish_resize函数完成迁移:
1 def _finish_resize(self, context, instance, migration, disk_info, 2 image): 3 resize_instance = False 4 old_instance_type_id = migration[‘old_instance_type_id‘] 5 new_instance_type_id = migration[‘new_instance_type_id‘] 6 old_instance_type = flavors.extract_flavor(instance) 7 sys_meta = instance.system_metadata 8 # 获取之前虚机的状态,如果没有设定的话 9 # 默认设为ACTIVE,以便向后兼容10 11 old_vm_state = sys_meta.get(‘old_vm_state‘, vm_states.ACTIVE)12 flavors.save_flavor_info(sys_meta,13 old_instance_type,14 prefix=‘old_‘)15 16 if old_instance_type_id != new_instance_type_id:17 instance_type = flavors.extract_flavor(instance, prefix=‘new_‘)18 flavors.save_flavor_info(sys_meta, instance_type)19 instance.instance_type_id = instance_type[‘id‘]20 instance.memory_mb = instance_type[‘memory_mb‘]21 instance.vcpus = instance_type[‘vcpus‘]22 instance.root_gb = instance_type[‘root_gb‘]23 instance.ephemeral_gb = instance_type[‘ephemeral_gb‘]24 instance.system_metadata =http://www.mamicode.com/ sys_meta25 instance.save()26 resize_instance = True27 28 # 在目的主机上建立网络29 self.network_api.setup_networks_on_host(context, instance,30 migration[‘dest_compute‘])31 32 instance_p = obj_base.obj_to_primitive(instance)33 migration_p = obj_base.obj_to_primitive(migration)34 self.conductor_api.network_migrate_instance_finish(context,35 instance_p,36 migration_p)37 38 network_info = self._get_instance_nw_info(context, instance)39 40 instance.task_state = task_states.RESIZE_FINISH41 instance.system_metadata =http://www.mamicode.com/ sys_meta42 instance.save(expected_task_state=task_states.RESIZE_MIGRATED)43 44 self._notify_about_instance_usage(45 context, instance, "finish_resize.start",46 network_info=network_info)47 48 block_device_info = self._get_instance_volume_block_device_info(49 context, instance, refresh_conn_info=True)50 51 # 如果虚机原先状态是STOPPED,52 # 迁移后不能开启虚机53 power_on = old_vm_state != vm_states.STOPPED54 self.driver.finish_migration(context, migration, instance,55 disk_info,56 network_info,57 image, resize_instance,58 block_device_info, power_on)59 60 migration.status = ‘finished‘61 migration.save(context.elevated())62 63 instance.vm_state = vm_states.RESIZED64 instance.task_state = None65 instance.launched_at = timeutils.utcnow()66 instance.save(expected_task_state=task_states.RESIZE_FINISH)67 68 self._notify_about_instance_usage(69 context, instance, "finish_resize.end",70 network_info=network_info)71 72 @wrap_exception()73 @reverts_task_state74 @wrap_instance_event75 @errors_out_migration76 @wrap_instance_fault
虚机迁移完成后回到虚机迁移的resize_instance函数:
1 self._notify_about_instance_usage(context, instance, "resize.end",2 network_info=network_info)3 self.instance_events.clear_events_for_instance(instance)
进行扫尾工作。然后虚机冷迁移就正式结束了,至于虚机冷迁移的算法调度策略以及磁盘信息的获得在此不过多讨论,毕竟是简析么~请允许我偷下懒
PS:本博客欢迎转发,但请注明博客地址及作者~
博客地址:http://www.cnblogs.com/voidy/
<。)#)))≦
OpenStack之虚机冷迁移代码简析