KVM虚拟机IO处理过程(二) ----QEMU/KVM I/O 处理过程

时间:2021-01-02 06:40:53

接着KVM虚拟机IO处理过程中Guest Vm IO处理过程(http://blog.csdn.net/dashulu/article/details/16820281),本篇文章主要描述IO从guest vm跳转到kvm和qemu后的处理过程.

首先回顾一下kvm的启动过程(http://blog.csdn.net/dashulu/article/details/17074675).qemu通过调用kvm提供的一系列接口来启动kvm. qemu的入口为vl.c中的main函数,main函数通过调用kvm_init 和 machine->init来初始化kvm. 其中, machine->init会创建vcpu, 用一个线程去模拟vcpu, 该线程执行的函数为qemu_kvm_cpu_thread_fn, 并且该线程最终kvm_cpu_exec,该函数调用kvm_vcpu_ioctl切换到kvm中,下次从kvm中返回时,会接着执行kvm_vcpu_ioctl之后的代码,判断exit_reason,然后进行相应处理.

  1. int kvm_cpu_exec(CPUState *cpu)
  2. {
  3. struct kvm_run *run = cpu->kvm_run;
  4. int ret, run_ret;
  5. DPRINTF("kvm_cpu_exec()\n");
  6. if (kvm_arch_process_async_events(cpu)) {
  7. cpu->exit_request = 0;
  8. return EXCP_HLT;
  9. }
  10. do {
  11. if (cpu->kvm_vcpu_dirty) {
  12. kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
  13. cpu->kvm_vcpu_dirty = false;
  14. }
  15. kvm_arch_pre_run(cpu, run);
  16. if (cpu->exit_request) {
  17. DPRINTF("interrupt exit requested\n");
  18. /*
  19. * KVM requires us to reenter the kernel after IO exits to complete
  20. * instruction emulation. This self-signal will ensure that we
  21. * leave ASAP again.
  22. */
  23. qemu_cpu_kick_self();
  24. }
  25. qemu_mutex_unlock_iothread();
  26. run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
  27. qemu_mutex_lock_iothread();
  28. kvm_arch_post_run(cpu, run);
  29. if (run_ret < 0) {
  30. if (run_ret == -EINTR || run_ret == -EAGAIN) {
  31. DPRINTF("io window exit\n");
  32. ret = EXCP_INTERRUPT;
  33. break;
  34. }
  35. fprintf(stderr, "error: kvm run failed %s\n",
  36. strerror(-run_ret));
  37. abort();
  38. }
  39. trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
  40. switch (run->exit_reason) {
  41. case KVM_EXIT_IO:
  42. DPRINTF("handle_io\n");
  43. kvm_handle_io(run->io.port,
  44. (uint8_t *)run + run->io.data_offset,
  45. run->io.direction,
  46. run->io.size,
  47. run->io.count);
  48. ret = 0;
  49. break;
  50. case KVM_EXIT_MMIO:
  51. DPRINTF("handle_mmio\n");
  52. cpu_physical_memory_rw(run->mmio.phys_addr,
  53. run->mmio.data,
  54. run->mmio.len,
  55. run->mmio.is_write);
  56. ret = 0;
  57. break;
  58. case KVM_EXIT_IRQ_WINDOW_OPEN:
  59. DPRINTF("irq_window_open\n");
  60. ret = EXCP_INTERRUPT;
  61. break;
  62. case KVM_EXIT_SHUTDOWN:
  63. DPRINTF("shutdown\n");
  64. qemu_system_reset_request();
  65. ret = EXCP_INTERRUPT;
  66. break;
  67. case KVM_EXIT_UNKNOWN:
  68. fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
  69. (uint64_t)run->hw.hardware_exit_reason);
  70. ret = -1;
  71. break;
  72. case KVM_EXIT_INTERNAL_ERROR:
  73. ret = kvm_handle_internal_error(cpu, run);
  74. break;
  75. default:
  76. DPRINTF("kvm_arch_handle_exit\n");
  77. ret = kvm_arch_handle_exit(cpu, run);
  78. break;
  79. }
  80. } while (ret == 0);
  81. if (ret < 0) {
  82. cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
  83. vm_stop(RUN_STATE_INTERNAL_ERROR);
  84. }
  85. cpu->exit_request = 0;
  86. return ret;
  87. }

kvm_vcpu_ioctl执行时,调用的kvm函数是virt/kvm/kvm-main.c中的kvm_vcpu_ioctl.c函数.当传入参数为KVM_RUN时,最终会调用到vcpu_enter_guest函数, vcpu_enter_guest函数中调用了kvm_x86_ops->run(vcpu),在intel处理器架构中该函数对应的实现为vmx_vcpu_run, vmx_vcpu_run设置好寄存器状态之后调用VM_LAUNCH或者VM_RESUME进入guest vm, 一旦发生vm exit则从此处继续执行下去.

  1. static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  2. {
  3. struct vcpu_vmx *vmx = to_vmx(vcpu);
  4. unsigned long debugctlmsr;
  5. /*...此处省略n行代码...*/
  6. vmx->__launched = vmx->loaded_vmcs->launched;
  7. asm(
  8. /* Store host registers */
  9. "push %%" _ASM_DX "; push %%" _ASM_BP ";"
  10. "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
  11. "push %%" _ASM_CX " \n\t"
  12. "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
  13. "je 1f \n\t"
  14. "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
  15. __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
  16. "1: \n\t"
  17. /* Reload cr2 if changed */
  18. "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
  19. "mov %%cr2, %%" _ASM_DX " \n\t"
  20. "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
  21. "je 2f \n\t"
  22. "mov %%" _ASM_AX", %%cr2 \n\t"
  23. "2: \n\t"
  24. /* Check if vmlaunch of vmresume is needed */
  25. "cmpl $0, %c[launched](%0) \n\t"
  26. /* Load guest registers.  Don't clobber flags. */
  27. "mov %c[rax](%0), %%" _ASM_AX " \n\t"
  28. "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
  29. "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
  30. "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
  31. "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
  32. "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
  33. #ifdef CONFIG_X86_64
  34. "mov %c[r8](%0),  %%r8  \n\t"
  35. "mov %c[r9](%0),  %%r9  \n\t"
  36. "mov %c[r10](%0), %%r10 \n\t"
  37. "mov %c[r11](%0), %%r11 \n\t"
  38. "mov %c[r12](%0), %%r12 \n\t"
  39. "mov %c[r13](%0), %%r13 \n\t"
  40. "mov %c[r14](%0), %%r14 \n\t"
  41. "mov %c[r15](%0), %%r15 \n\t"
  42. #endif
  43. "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
  44. /* Enter guest mode */
  45. "jne 1f \n\t"
  46. __ex(ASM_VMX_VMLAUNCH) "\n\t"
  47. "jmp 2f \n\t"
  48. "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
  49. "2: "
  50. /* Save guest registers, load host registers, keep flags */
  51. "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
  52. "pop %0 \n\t"
  53. "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
  54. "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
  55. __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
  56. "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
  57. "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
  58. "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
  59. "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
  60. #ifdef CONFIG_X86_64
  61. "mov %%r8,  %c[r8](%0) \n\t"
  62. "mov %%r9,  %c[r9](%0) \n\t"
  63. "mov %%r10, %c[r10](%0) \n\t"
  64. "mov %%r11, %c[r11](%0) \n\t"
  65. "mov %%r12, %c[r12](%0) \n\t"
  66. "mov %%r13, %c[r13](%0) \n\t"
  67. "mov %%r14, %c[r14](%0) \n\t"
  68. "mov %%r15, %c[r15](%0) \n\t"
  69. #endif
  70. "mov %%cr2, %%" _ASM_AX "   \n\t"
  71. "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
  72. "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
  73. "setbe %c[fail](%0) \n\t"
  74. ".pushsection .rodata \n\t"
  75. ".global vmx_return \n\t"
  76. "vmx_return: " _ASM_PTR " 2b \n\t"
  77. ".popsection"
  78. : : "c"(vmx), "d"((unsigned long)HOST_RSP),
  79. [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
  80. [fail]"i"(offsetof(struct vcpu_vmx, fail)),
  81. [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
  82. [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
  83. [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
  84. [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
  85. [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
  86. [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
  87. [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
  88. [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
  89. #ifdef CONFIG_X86_64
  90. [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
  91. [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
  92. [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
  93. [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
  94. [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
  95. [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
  96. [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
  97. [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
  98. #endif
  99. [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
  100. [wordsize]"i"(sizeof(ulong))
  101. : "cc", "memory"
  102. #ifdef CONFIG_X86_64
  103. , "rax", "rbx", "rdi", "rsi"
  104. , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
  105. #else
  106. , "eax", "ebx", "edi", "esi"
  107. #endif
  108. );
  109. /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
  110. if (debugctlmsr)
  111. update_debugctlmsr(debugctlmsr);
  112. #ifndef CONFIG_X86_64
  113. /*
  114. * The sysexit path does not restore ds/es, so we must set them to
  115. * a reasonable value ourselves.
  116. *
  117. * We can't defer this to vmx_load_host_state() since that function
  118. * may be executed in interrupt context, which saves and restore segments
  119. * around it, nullifying its effect.
  120. */
  121. loadsegment(ds, __USER_DS);
  122. loadsegment(es, __USER_DS);
  123. #endif
  124. vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
  125. | (1 << VCPU_EXREG_RFLAGS)
  126. | (1 << VCPU_EXREG_CPL)
  127. | (1 << VCPU_EXREG_PDPTR)
  128. | (1 << VCPU_EXREG_SEGMENTS)
  129. | (1 << VCPU_EXREG_CR3));
  130. vcpu->arch.regs_dirty = 0;
  131. vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
  132. if (is_guest_mode(vcpu)) {
  133. struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
  134. vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
  135. if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
  136. vmcs12->idt_vectoring_error_code =
  137. vmcs_read32(IDT_VECTORING_ERROR_CODE);
  138. vmcs12->vm_exit_instruction_len =
  139. vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
  140. }
  141. }
  142. vmx->loaded_vmcs->launched = 1;
  143. vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
  144. trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
  145. vmx_complete_atomic_exit(vmx);
  146. vmx_recover_nmi_blocking(vmx);
  147. vmx_complete_interrupts(vmx);
  148. }

介绍完初始化的流程,可以介绍IO在kvm和qemu中的处理流程了. 当Guest Vm进行IO操作需要访问设备时,就会触发vm exit 返回到vmx_vcpu_run, vmx保存好vmcs并且记录下VM_ExIT_REASON后返回到调用该函数的vcpu_enter_guest, 在vcpu_enter_guest函数末尾调用了r = kvm_x86_ops->handle_exit(vcpu), 该函数对应于vmx_handle_exit函数(intel cpu架构对应关系可以查看vmx.c文件中static struct kvm_x86_ops vmx_x86_ops), vmx_handle_exit 调用kvm_vmx_exit_handlers[exit_reason](vcpu),该语句根据exit_reason调用不同的函数,该数据结构定义如下:

  1. static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
  2. [EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
  3. [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
  4. [EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
  5. [EXIT_REASON_NMI_WINDOW]          = handle_nmi_window,
  6. [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
  7. [EXIT_REASON_CR_ACCESS]               = handle_cr,
  8. [EXIT_REASON_DR_ACCESS]               = handle_dr,
  9. [EXIT_REASON_CPUID]                   = handle_cpuid,
  10. [EXIT_REASON_MSR_READ]                = handle_rdmsr,
  11. [EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
  12. [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
  13. [EXIT_REASON_HLT]                     = handle_halt,
  14. [EXIT_REASON_INVD]            = handle_invd,
  15. [EXIT_REASON_INVLPG]              = handle_invlpg,
  16. [EXIT_REASON_RDPMC]                   = handle_rdpmc,
  17. [EXIT_REASON_VMCALL]                  = handle_vmcall,
  18. [EXIT_REASON_VMCLEAR]                 = handle_vmclear,
  19. [EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
  20. [EXIT_REASON_VMPTRLD]                 = handle_vmptrld,
  21. [EXIT_REASON_VMPTRST]                 = handle_vmptrst,
  22. [EXIT_REASON_VMREAD]                  = handle_vmread,
  23. [EXIT_REASON_VMRESUME]                = handle_vmresume,
  24. [EXIT_REASON_VMWRITE]                 = handle_vmwrite,
  25. [EXIT_REASON_VMOFF]                   = handle_vmoff,
  26. [EXIT_REASON_VMON]                    = handle_vmon,
  27. [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
  28. [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
  29. [EXIT_REASON_WBINVD]                  = handle_wbinvd,
  30. [EXIT_REASON_XSETBV]                  = handle_xsetbv,
  31. [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
  32. [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
  33. [EXIT_REASON_EPT_VIOLATION]       = handle_ept_violation,
  34. [EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
  35. [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
  36. [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
  37. [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
  38. };

如果是因为IO原因导致的vm exit,则调用的处理函数为handle_io,handle_io的处理可以查看(http://blog.csdn.net/fanwenyi/article/details/12748613), 该过程结束之后需要qemu去处理IO,这时候会返回到qemu, 在kvm_cpu_exec中继续执行下去,看上面kvm_cpu_exec的代码,如果是因为IO原因返回到qemu,会调用kvm_handle_io函数.

  1. switch (run->exit_reason) {
  2. case KVM_EXIT_IO:
  3. DPRINTF("handle_io\n");
  4. kvm_handle_io(run->io.port,
  5. (uint8_t *)run + run->io.data_offset,
  6. run->io.direction,
  7. run->io.size,
  8. run->io.count);
  9. ret = 0;
  10. break;

kvm_handle_io调用cpu_outb, cpu_outw等指令处理IO操作.

假设虚拟机是用raw格式的磁盘,则IO在qemu中处理时经过的函数栈如下所示:

  1. #0 bdrv_aio_writev (bs=0x55555629e9b0, sector_num=870456,
  2. qiov=0x555556715ab0, nb_sectors=1,
  3. cb=0x55555570161b <ide_sector_write_cb>, opaque=0x5555567157b8)
  4. at block.c:3408
  5. #1 0x0000555555701960 in ide_sector_write (s=0x5555567157b8)
  6. at hw/ide/core.c:798
  7. #2 0x00005555557047ae in ide_data_writew (opaque=0x555556715740, addr=496,
  8. val=8995) at hw/ide/core.c:1907
  9. #3 0x00005555558d9e4c in portio_write (opaque=0x5555565c0670, addr=0,
  10. data=8995, size=2) at /home/dashu/kvm/qemu/qemu-dev-zwu/ioport.c:174
  11. #4 0x00005555558e13d5 in memory_region_write_accessor (mr=0x5555565c0670,
  12. addr=0, value=0x7fffb4dbd528, size=2, shift=0, mask=65535)
  13. at /home/dashu/kvm/qemu/qemu-dev-zwu/memory.c:440
  14. #5 0x00005555558e151d in access_with_adjusted_size (addr=0,
  15. value=0x7fffb4dbd528, size=2, access_size_min=1, access_size_max=4,
  16. access=0x5555558e1341 <memory_region_write_accessor>, mr=0x5555565c0670)
  17. at /home/dashu/kvm/qemu/qemu-dev-zwu/memory.c:477
  18. #6 0x00005555558e3dfb in memory_region_dispatch_write (mr=0x5555565c0670,
  19. addr=0, data=8995, size=2)
  20. at /home/dashu/kvm/qemu/qemu-dev-zwu/memory.c:984
  21. #7 0x00005555558e7384 in io_mem_write (mr=0x5555565c0670, addr=0, val=8995,
  22. size=2) at /home/dashu/kvm/qemu/qemu-dev-zwu/memory.c:1748
  23. #8 0x000055555586a18e in address_space_rw (as=0x555556216d80, addr=496,
  24. buf=0x7fffb4dbd670 "##", len=2, is_write=true)
  25. at /home/dashu/kvm/qemu/qemu-dev-zwu/exec.c:1968
  26. #9 0x000055555586a474 in address_space_write (as=0x555556216d80, addr=496,
  27. buf=0x7fffb4dbd670 "##", len=2)
  28. at /home/dashu/kvm/qemu/qemu-dev-zwu/exec.c:2030
  29. #10 0x00005555558d98c9 in cpu_outw (addr=496, val=8995)
  30. at /home/dashu/kvm/qemu/qemu-dev-zwu/ioport.c:61

bdrv_aio_writev最终调用bdrv_co_aio_rw_vector函数, 该函数调用co = qemu_coroutine_create(bdrv_co_do_rw) 创建一个协程去执行bdrv_co_do_rw函数,bdrv_co_wo_rw函数的函数栈如下:

  1. #1 0x000055555563653c in paio_submit (bs=0x5555562a13d0, fd=10, sector_num=2,
  2. qiov=0x555556715ab0, nb_sectors=1,
  3. cb=0x5555556028b1 <bdrv_co_io_em_complete>, opaque=0x555556964e30, type=1)
  4. at block/raw-posix.c:825
  5. #2 0x0000555555636659 in raw_aio_submit (bs=0x5555562a13d0, sector_num=2,
  6. qiov=0x555556715ab0, nb_sectors=1,
  7. cb=0x5555556028b1 <bdrv_co_io_em_complete>, opaque=0x555556964e30, type=1)
  8. at block/raw-posix.c:853
  9. #3 0x00005555556366c9 in raw_aio_readv (bs=0x5555562a13d0, sector_num=2,
  10. qiov=0x555556715ab0, nb_sectors=1,
  11. cb=0x5555556028b1 <bdrv_co_io_em_complete>, opaque=0x555556964e30)
  12. at block/raw-posix.c:861
  13. #4 0x00005555556029b8 in bdrv_co_io_em (bs=0x5555562a13d0, sector_num=2,
  14. nb_sectors=1, iov=0x555556715ab0, is_write=false) at block.c:4038
  15. #5 0x0000555555602a49 in bdrv_co_readv_em (bs=0x5555562a13d0, sector_num=2,
  16. nb_sectors=1, iov=0x555556715ab0) at block.c:4055
  17. #6 0x00005555555fed61 in bdrv_co_do_readv (bs=0x5555562a13d0, sector_num=2,
  18. nb_sectors=1, qiov=0x555556715ab0, flags=0) at block.c:2547
  19. #7 0x00005555555fee03 in bdrv_co_readv (bs=0x5555562a13d0, sector_num=2,
  20. nb_sectors=1, qiov=0x555556715ab0) at block.c:2573
  21. #8 0x0000555555637d8c in raw_co_readv (bs=0x55555629e9b0, sector_num=2,
  22. nb_sectors=1, qiov=0x555556715ab0) at block/raw.c:47
  23. #9 0x00005555555fed61 in bdrv_co_do_readv (bs=0x55555629e9b0, sector_num=2,
  24. nb_sectors=1, qiov=0x555556715ab0, flags=0) at block.c:2547
  25. #10 0x00005555556023af in bdrv_co_do_rw

最终在paio_summit中会往线程池中提交一个请求thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque), 由调度器去执行aio_worker函数,aio_worker是真正做IO操作的函数,它通过pwrite和pread去读取磁盘.

当qemu完成IO操作后,会在kvm_cpu_exec函数的循环中,调用kvm_vcpu_ioctl重新进入kvm.

以上阐述了IO操作在kvm和qemu中处理的整个过程.

参考资料:

1. kvm代码解析连载(二):io的虚拟化:http://blog.csdn.net/fanwenyi/article/details/12748613