话说虚机迁移分为冷迁移以及热迁移,所谓热迁移用度娘的话说即是:热迁移(Live Migration,又叫动态迁移、实时迁移),即虚机保存/恢复(Save/Restore):将整个虚拟机的运行状态完整保存下来,同时可以快速的恢复到原有硬件平台甚至是不同硬件平台上。恢复以后,虚机仍旧平滑运行,用户不会察觉到任何差异。OpenStack的虚机迁移是基于Libvirt实现的,下面来看看Openstack虚机热迁移的具体代码实现。
首先,由API入口进入到nova/api/openstack/compute/contrib/admin_actions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
@wsgi .action( 'os-migrateLive' )
def _migrate_live( self , req, id , body):
"""Permit admins to (live) migrate a server to a new host."""
context = req.environ[ "nova.context" ]
authorize(context, 'migrateLive' )
try :
block_migration = body[ "os-migrateLive" ][ "block_migration" ]
disk_over_commit = body[ "os-migrateLive" ][ "disk_over_commit" ]
host = body[ "os-migrateLive" ][ "host" ]
except (TypeError, KeyError):
msg = _( "host, block_migration and disk_over_commit must "
"be specified for live migration." )
raise exc.HTTPBadRequest(explanation = msg)
try :
block_migration = strutils.bool_from_string(block_migration,
strict = True )
disk_over_commit = strutils.bool_from_string(disk_over_commit,
strict = True )
except ValueError as err:
raise exc.HTTPBadRequest(explanation = str (err))
try :
instance = self .compute_api.get(context, id , want_objects = True )
self .compute_api.live_migrate(context, instance, block_migration,
disk_over_commit, host)
except (exception.ComputeServiceUnavailable,
exception.InvalidHypervisorType,
exception.UnableToMigrateToSelf,
exception.DestinationHypervisorTooOld,
exception.NoValidHost,
exception.InvalidLocalStorage,
exception.InvalidSharedStorage,
exception.MigrationPreCheckError) as ex:
raise exc.HTTPBadRequest(explanation = ex.format_message())
except exception.InstanceNotFound as e:
raise exc.HTTPNotFound(explanation = e.format_message())
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'os-migrateLive' )
except Exception:
if host is None :
msg = _( "Live migration of instance %s to another host "
"failed" ) % id
else :
msg = _( "Live migration of instance %(id)s to host %(host)s "
"failed" ) % { 'id' : id , 'host' : host}
LOG.exception(msg)
# Return messages from scheduler
raise exc.HTTPBadRequest(explanation = msg)
return webob.Response(status_int = 202 )
|
这里第一行可以看到是与API文档的第二行照应的:
1
2
3
4
5
6
7
|
{
"os-migrateLive" : {
"host" : "0443e9a1254044d8b99f35eace132080" ,
"block_migration" : false,
"disk_over_commit" : false
}
}
|
好了,源码中其实执行迁移工作的就是第26、27行的一条语句:
1
2
|
self .compute_api.live_migrate(context, instance, block_migration,
disk_over_commit, host)
|
由这句进入到nova/compute/api.py中,源码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
@check_instance_cell
@check_instance_state (vm_state = [vm_states.ACTIVE])
def live_migrate( self , context, instance, block_migration,
disk_over_commit, host_name):
"""Migrate a server lively to a new host."""
LOG.debug(_( "Going to try to live migrate instance to %s" ),
host_name or "another host" , instance = instance)
instance.task_state = task_states.MIGRATING
instance.save(expected_task_state = [ None ])
self .compute_task_api.live_migrate_instance(context, instance,
host_name, block_migration = block_migration,
disk_over_commit = disk_over_commit)
|
第2行是一个装饰器,用于在进入API方法之前,检测虚拟机和/或任务的状态, 如果实例处于错误的状态,将会引发异常;接下来实时迁移虚机到新的主机,并将虚机状态置于“migrating”,然后由12行进入nova/conductor/api.py
1
2
3
4
5
6
|
def live_migrate_instance( self , context, instance, host_name,
block_migration, disk_over_commit):
scheduler_hint = { 'host' : host_name}
self ._manager.migrate_server(
context, instance, scheduler_hint, True , False , None ,
block_migration, disk_over_commit, None )
|
将主机名存入字典scheduler_hint中,然后调用nova/conductor/manager.py方法migrate_server,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
def migrate_server( self , context, instance, scheduler_hint, live, rebuild,
flavor, block_migration, disk_over_commit, reservations = None ):
if instance and not isinstance (instance, instance_obj.Instance):
# NOTE(danms): Until v2 of the RPC API, we need to tolerate
# old-world instance objects here
attrs = [ 'metadata' , 'system_metadata' , 'info_cache' ,
'security_groups' ]
instance = instance_obj.Instance._from_db_object(
context, instance_obj.Instance(), instance,
expected_attrs = attrs)
if live and not rebuild and not flavor:
self ._live_migrate(context, instance, scheduler_hint,
block_migration, disk_over_commit)
elif not live and not rebuild and flavor:
instance_uuid = instance[ 'uuid' ]
with compute_utils.EventReporter(context, self .db,
'cold_migrate' , instance_uuid):
self ._cold_migrate(context, instance, flavor,
scheduler_hint[ 'filter_properties' ],
reservations)
else :
raise NotImplementedError()
|
由于在nova/conductor/api.py中传过来的参数是
1
2
3
|
self ._manager.migrate_server(
context, instance, scheduler_hint, True , False , None ,
block_migration, disk_over_commit, None )
|
因此live是True,rebuild是Flase,flavor是None,执行第12、13行代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
if live and not rebuild and not flavor:
self ._live_migrate(context, instance, scheduler_hint,
block_migration, disk_over_commit)
_live_migrate代码如下:
def _live_migrate( self , context, instance, scheduler_hint,
block_migration, disk_over_commit):
destination = scheduler_hint.get( "host" )
try :
live_migrate.execute(context, instance, destination,
block_migration, disk_over_commit)
except (exception.NoValidHost,
exception.ComputeServiceUnavailable,
exception.InvalidHypervisorType,
exception.InvalidCPUInfo,
exception.UnableToMigrateToSelf,
exception.DestinationHypervisorTooOld,
exception.InvalidLocalStorage,
exception.InvalidSharedStorage,
exception.HypervisorUnavailable,
exception.MigrationPreCheckError) as ex:
with excutils.save_and_reraise_exception():
#TODO(johngarbutt) - eventually need instance actions here
request_spec = { 'instance_properties' : {
'uuid' : instance[ 'uuid' ], },
}
scheduler_utils.set_vm_state_and_notify(context,
'compute_task' , 'migrate_server' ,
dict (vm_state = instance[ 'vm_state' ],
task_state = None ,
expected_task_state = task_states.MIGRATING,),
ex, request_spec, self .db)
except Exception as ex:
LOG.error(_( 'Migration of instance %(instance_id)s to host'
' %(dest)s unexpectedly failed.' ),
{ 'instance_id' : instance[ 'uuid' ], 'dest' : destination},
exc_info = True )
raise exception.MigrationError(reason = ex)
|
首先,第三行中将主机名赋给destination,然后执行迁移,后面的都是异常的捕捉,执行迁移的代码分为两部分,先看第一部分,在nova/conductor/tasks/live_migrate.py的184行左右:
1
2
3
4
5
6
7
8
|
def execute(context, instance, destination,
block_migration, disk_over_commit):
task = LiveMigrationTask(context, instance,
destination,
block_migration,
disk_over_commit)
#TODO(johngarbutt) create a superclass that contains a safe_execute call
return task.execute()
|
先创建包含安全执行回调的超类,然后返回如下函数也即执行迁移的第二部分代码,在54行左右:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
def execute( self ):
self ._check_instance_is_running()
self ._check_host_is_up( self .source)
if not self .destination:
self .destination = self ._find_destination()
else :
self ._check_requested_destination()
#TODO(johngarbutt) need to move complexity out of compute manager
return self .compute_rpcapi.live_migration( self .context,
host = self .source,
instance = self .instance,
dest = self .destination,
block_migration = self .block_migration,
migrate_data = self .migrate_data)
#TODO(johngarbutt) disk_over_commit?
|
这里有三部分内容:
如果目前主机不存在,则由调度算法选取一个目标主机,并且进行相关的检测,确保能够进行实时迁移操作;
如果目标主机存在,则直接进行相关的检测操作,确保能够进行实时迁移操作;
执行迁移操作。
前两部分不再赘述,直接看第三部分代码,在nova/compute/rpcapi.py中:
1
2
3
4
5
6
7
8
9
|
def live_migration( self , ctxt, instance, dest, block_migration, host,
migrate_data = None ):
# NOTE(russellb) Havana compat
version = self ._get_compat_version( '3.0' , '2.0' )
instance_p = jsonutils.to_primitive(instance)
cctxt = self .client.prepare(server = host, version = version)
cctxt.cast(ctxt, 'live_migration' , instance = instance_p,
dest = dest, block_migration = block_migration,
migrate_data = migrate_data)
|
热迁移开始执行:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
def live_migration( self , context, instance, dest,
post_method, recover_method, block_migration = False ,
migrate_data = None ):
"""Spawning live_migration operation for distributing high-load.
:param context: security context
:param instance:
nova.db.sqlalchemy.models.Instance object
instance object that is migrated.
:param dest: destination host
:param post_method:
post operation method.
expected nova.compute.manager.post_live_migration.
:param recover_method:
recovery method when any exception occurs.
expected nova.compute.manager.recover_live_migration.
:param block_migration: if true, do block migration.
:param migrate_data: implementation specific params
"""
greenthread.spawn( self ._live_migration, context, instance, dest,
post_method, recover_method, block_migration,
migrate_data)
|
这个方法中建立一个绿色线程来运行方法_live_migration,来执行实时迁移; 主要是调用libvirt python接口方法virDomainMigrateToURI,来实现从当前主机迁移domain对象到给定的目标主机;
spawn:建立一个绿色线程来运行方法“func(*args, **kwargs)”,这里就是来运行方法_live_migration;
_live_migration:执行实时迁移; 主要是调用libvirt python接口方法virDomainMigrateToURI,来实现从当前主机迁移domain对象到给定的目标主机;
接着在绿色线程中调用_live_migration方法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
def _live_migration( self , context, instance, dest, post_method,
recover_method, block_migration = False ,
migrate_data = None ):
"""Do live migration.
:param context: security context
:param instance:
nova.db.sqlalchemy.models.Instance object
instance object that is migrated.
:param dest: destination host
:param post_method:
post operation method.
expected nova.compute.manager.post_live_migration.
:param recover_method:
recovery method when any exception occurs.
expected nova.compute.manager.recover_live_migration.
:param block_migration: if true, do block migration.
:param migrate_data: implementation specific params
"""
# Do live migration.
try :
if block_migration:
flaglist = CONF.libvirt.block_migration_flag.split( ',' )
else :
flaglist = CONF.libvirt.live_migration_flag.split( ',' )
flagvals = [ getattr (libvirt, x.strip()) for x in flaglist]
logical_sum = reduce ( lambda x, y: x | y, flagvals)
dom = self ._lookup_by_name(instance[ "name" ])
dom.migrateToURI(CONF.libvirt.live_migration_uri % dest,
logical_sum,
None ,
CONF.libvirt.live_migration_bandwidth)
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.error(_( "Live Migration failure: %s" ), e,
instance = instance)
recover_method(context, instance, dest, block_migration)
# Waiting for completion of live_migration.
timer = loopingcall.FixedIntervalLoopingCall(f = None )
|
1
2
|
if block_migration:
flaglist = CONF.libvirt.block_migration_flag.split( ',' )
|
这个获取块迁移标志列表,block_migration_flag:这个参数定义了为块迁移设置迁移标志。
1
2
3
4
|
else :
flaglist = CONF.libvirt.live_migration_flag.split( ',' )
flagvals = [ getattr (libvirt, x.strip()) for x in flaglist]
logical_sum = reduce ( lambda x, y: x | y, flagvals)
|
这部分获取实时迁移标志列表,live_migration_flag这个参数定义了实时迁移的迁移标志。
1
|
dom = self ._lookup_by_name(instance[ "name" ])
|
根据给定的实例名称检索libvirt域对象。
1
|
timer = loopingcall.FixedIntervalLoopingCall(f = None )
|
获取等待完成实时迁移的时间。
热迁移代码部分至此结束。
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。