在内核中针对的cpu的操作,比如arm_cpuidle_init、arm_cpuidle_suspend、boot_secondary、secondary_start_kernel、op_cpu_disable、op_cpu_kill、cpu_die、smp_cpu_setup、smp_prepare_cpus的都会回落到对cpu_ops的调用。
cpu_ops将针对底层cpu的操作抽象为一系列回调函数,以统一的形式向上层提供API。
cpu_psci_ops作为cpu_ops的一个特殊实现,将cpu_ops关联到PSCI的psci_ops。
psci_ops的函数在PSCI Firmware中实现,提供一系列基于Function ID的调用。
这种分层思想将内核通用cpu_operations和硬件相关部分分隔开。
cpu_operations及其应用
首先分析一些cpu_operations这个结构体:
struct cpu_operations { const char *name; int (*cpu_init)(unsigned int); 读取必要的数据准备初始化。 int (*cpu_prepare)(unsigned int); 启动前准备工作 int (*cpu_boot)(unsigned int); 启动一个CPU void (*cpu_postboot)(void); 执行boot后的清理工作 #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(unsigned int cpu); 关闭CPU之前的准备工作 void (*cpu_die)(unsigned int cpu); 关闭CPU int (*cpu_kill)(unsigned int cpu); 确认是否关闭 #endif #ifdef CONFIG_CPU_IDLE int (*cpu_init_idle)(unsigned int); 读取CPU idle状态的参数 int (*cpu_suspend)(unsigned long); suspend一个CPU,并且保存上下文 #endif }; |
cpu_init
static int __init smp_cpu_setup(int cpu) { if (cpu_read_ops(cpu)) return -ENODEV; if (cpu_ops[cpu]->cpu_init(cpu)) return -ENODEV; set_cpu_possible(cpu, true); return 0; } |
获取指定cpu的cpu_ops,执行cpu_init回调函数进行初始化。并将此cpu设置为possible。
cpu_prepare
void __init smp_prepare_cpus(unsigned int max_cpus) { int err; unsigned int cpu, ncores = num_possible_cpus(); init_cpu_topology(); 填充cpu_topology结构体数组 smp_store_cpu_info(smp_processor_id()); /* * are we trying to boot more cores than exist? */ if (max_cpus > ncores) 不能超过possible cpu数目 max_cpus = ncores; /* Don‘t bother if we‘re effectively UP */ if (max_cpus <= 1) return; /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. * * Make sure we online at most (max_cpus - 1) additional CPUs. */ max_cpus--; for_each_possible_cpu(cpu) { if (max_cpus == 0) break; if (cpu == smp_processor_id()) continue; if (!cpu_ops[cpu]) continue; err = cpu_ops[cpu]->cpu_prepare(cpu); 执行.cpu_prepare回调函数,将指定cpu设置为present。 if (err) continue; set_cpu_present(cpu, true); max_cpus--; } } |
cpu_boot
static int boot_secondary(unsigned int cpu, struct task_struct *idle) { if (cpu_ops[cpu]->cpu_boot) return cpu_ops[cpu]->cpu_boot(cpu); return -EOPNOTSUPP; } |
cpu_postboot
asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ atomic_inc(&mm->mm_count); current->active_mm = mm; set_my_cpu_offset(per_cpu_offset(smp_processor_id())); /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); preempt_disable(); trace_hardirqs_off(); /* * If the system has established the capabilities, make sure * this CPU ticks all of those. If it doesn‘t, the CPU will * fail to come online. */ verify_local_cpu_capabilities(); if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); /* * Log the CPU info before it is marked online and might get read. */ cpuinfo_store_cpu(); /* * Enable GIC and timers. */ notify_cpu_starting(cpu); smp_store_cpu_info(cpu); /* * OK, now it‘s safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online * before we continue. */ pr_info("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); set_cpu_online(cpu, true); complete(&cpu_running); local_dbg_enable(); local_irq_enable(); local_async_enable(); /* * OK, it‘s off to the idle thread for us */ cpu_startup_entry(CPUHP_ONLINE); } |
cpu_disable
static int op_cpu_disable(unsigned int cpu) { /* * If we don‘t have a cpu_die method, abort before we reach the point * of no return. CPU0 may not have an cpu_ops, so test for it. */ if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) return -EOPNOTSUPP; /* * We may need to abort a hot unplug for some other mechanism-specific * reason. */ if (cpu_ops[cpu]->cpu_disable) return cpu_ops[cpu]->cpu_disable(cpu); return 0; } |
cpu_die
void cpu_die(void) { unsigned int cpu = smp_processor_id(); idle_task_exit(); local_irq_disable(); /* Tell __cpu_die() that this CPU is now safe to dispose of */ (void)cpu_report_death(); /* * Actually shutdown the CPU. This must never fail. The specific hotplug * mechanism must perform all required cache maintenance to ensure that * no dirty lines are lost in the process of shutting down the CPU. */ cpu_ops[cpu]->cpu_die(cpu); BUG(); } |
cpu_kill
static int op_cpu_kill(unsigned int cpu) { /* * If we have no means of synchronising with the dying CPU, then assume * that it is really dead. We can only wait for an arbitrary length of * time and hope that it‘s dead, so let‘s skip the wait and just hope. */ if (!cpu_ops[cpu]->cpu_kill) return 0; return cpu_ops[cpu]->cpu_kill(cpu); } |
cpu_init_idle
int __init arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) ret = cpu_ops[cpu]->cpu_init_idle(cpu); return ret; } |
cpu_suspend
int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); /* * If cpu_ops have not been registered or suspend * has not been initialized, cpu_suspend call fails early. */ if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) return -EOPNOTSUPP; return cpu_ops[cpu]->cpu_suspend(index); } |
cpu_ops到arch-dependent的关联
以start_kernel为起点,查看从内核开始到获取cpu_ops的路径如下:
start_kernel -->setup_arch -->cpu_read_bootcpu_ops 只获取bootcpu的cpu_ops -->cpu_read_bootcpu_ops -->cpu_read_ops(0) -->smp_init_cpus 获取nonboot cpu的cpu_ops -->smp_cpu_setup -->cpu_read_ops |
cpu_read_ops是获取cpu_ops的关键,参数是cpu的序列号,输出是cpu_ops[cpu]。
int __init cpu_read_ops(int cpu) { const char *enable_method = cpu_read_enable_method(cpu); 从DeviceTree获取enable_method字符串 if (!enable_method) return -ENODEV; cpu_ops[cpu] = cpu_get_ops(enable_method); 根据enable_method字符串在supported_cpu_ops获取指针 if (!cpu_ops[cpu]) { pr_warn("Unsupported enable-method: %s\n", enable_method); return -EOPNOTSUPP; } return 0; } |
通过cpu0的DeviceTree可以看出enable-method为pcsi。
支持的cpu_operations有:
static const struct cpu_operations *supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; |
所以cpu_ops=&cpu_psci_ops。
cpu_psci_ops分析
cpu_psci_ops结构体可以说是cpu_operations和psci_operations的桥梁,他讲cpu_operations的一些列回调函数,映射到psci_operations。
const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE .cpu_init_idle = cpu_psci_cpu_init_idle, 从DeviceTree获取CPU idle状态数据 .cpu_suspend = cpu_psci_cpu_suspend, 根据是否丢失上下文来选择是psci_ops.cpu_suspend还是cpu_suspend #endif .cpu_init = cpu_psci_cpu_init, 为空 .cpu_prepare = cpu_psci_cpu_prepare, 只是判断psci_ops.cpu_on是否存在,不存在则返回错误。 .cpu_boot = cpu_psci_cpu_boot, 调用psci_ops.cpu_on #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cpu_psci_cpu_disable, 检查是否支持psci_ops.cpu_off。 .cpu_die = cpu_psci_cpu_die, 调用psci_ops.cpu_off .cpu_kill = cpu_psci_cpu_kill, 检查指定cpu是否已经被kill #endif } |
cpu_psci_cpu_boot
static int cpu_psci_cpu_boot(unsigned int cpu) { int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); if (err) pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; } |
CPU_ON用于secondary boot、hotplug或者big.LITTLE迁移。如果需要从一个核启动另一个核,通过CPU_ON提供一个入口地址和上下文标识。
PCSI提供必要的操作启动一个核,并且在提供的入口地址开始执行,上下文标识必须存在R0或者W0中。这里的入口地址就对应secondary_entry。
在arch/arm64/kernel/head.S中:
secondary_entry—>secondary_startup—>__secondary_switched—>secondary_start_kernel ENTRY(secondary_entry) bl el2_setup // Drop to EL1 bl set_cpu_boot_mode_flag b secondary_startup ENDPROC(secondary_entry) ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. */ adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor ldr x21, =secondary_data ldr x27, =__secondary_switched // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) |
在secondary_start_kernel将CPU设置为online,并调用.cpu_postboot回调函数,进行boot后处理。然后cpu_startup_entry启动idle线程。
cpu_psci_cpu_init_idle
static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu) { int i, ret, count = 0; u32 *psci_states; struct device_node *state_node, *cpu_node; cpu_node = of_get_cpu_node(cpu, NULL); if (!cpu_node) return -ENODEV; /* * If the PSCI cpu_suspend function hook has not been initialized * idle states must not be enabled, so bail out */ if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; /* Count idle states */ while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", count))) { count++; of_node_put(state_node); } if (!count) return -ENODEV; psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); if (!psci_states) return -ENOMEM; for (i = 0; i < count; i++) { u32 state; state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); ret = of_property_read_u32(state_node, "arm,psci-suspend-param", &state); if (ret) { pr_warn(" * %s missing arm,psci-suspend-param property\n", state_node->full_name); of_node_put(state_node); goto free_mem; } of_node_put(state_node); pr_debug("psci-power-state %#x index %d\n", state, i); if (!psci_power_state_is_valid(state)) { pr_warn("Invalid PSCI power state %#x\n", state); ret = -EINVAL; goto free_mem; } psci_states[i] = state; } /* Idle states parsed correctly, initialize per-cpu pointer */ per_cpu(psci_power_state, cpu) = psci_states; return 0; free_mem: kfree(psci_states); return ret; } |
1.解析DeviceTree中cpu下的cpu-idle-states属性
2.从每个state中获取arm,psci-suspend-param的参数,并验证是否有效。
3.初始化per-CPU类型的指针psci_power_state。
cpu_psci_cpu_suspend
static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index) { int ret; u32 *state = __this_cpu_read(psci_power_state); 从psci_power_state中读取suspend的state参数。 /* * idle state index 0 corresponds to wfi, should never be called * from the cpu_suspend operations */ if (WARN_ON_ONCE(!index)) return -EINVAL; if (!psci_power_state_loses_context(state[index - 1])) ret = psci_ops.cpu_suspend(state[index - 1], 0); else ret = cpu_suspend(index, psci_suspend_finisher); return ret; } |
psci_ops
由于acpi_disabled,所以psci通过DeviceTree获取相关参数。
start_kernel -->setup_arch -->psci_dt_init 这个函数在cpu_ops之前,因为cpu_ops依赖psci_ops |
psci有不同版本,需要通过DeviceTree获取版本信息和使用的method(是smc还是)。
通过查看DeviceTree可以看到对应的是psci_0_2_init。
static const struct of_device_id const psci_of_match[] __initconst = { { .compatible = "arm,psci", .data = http://www.mamicode.com/psci_0_1_init}, { .compatible = "arm,psci-0.2", .data = http://www.mamicode.com/psci_0_2_init}, { .compatible = "arm,psci-1.0", .data = http://www.mamicode.com/psci_0_2_init}, {}, }; |
psci_dt_init解析DeviceTree执行对应psci版本的初始化函数。
int __init psci_dt_init(void) { struct device_node *np; const struct of_device_id *matched_np; psci_initcall_t init_fn; np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); if (!np) return -ENODEV; init_fn = (psci_initcall_t)matched_np->data; return init_fn(np); } |
psci_0_2_init设置method,然后调用psci_probe:
static int __init psci_0_2_init(struct device_node *np) { int err; err = get_set_conduit_method(np); 从DeviceTree可知invoke_psci_fn = __invoke_psci_fn_smc if (err) goto out_put_node; /* * Starting with v0.2, the PSCI specification introduced a call * (PSCI_VERSION) that allows probing the firmware version, so * that PSCI function IDs and version specific initialization * can be carried out according to the specific version reported * by firmware */ err = psci_probe(); out_put_node: of_node_put(np); return err; } |
psci_probe设置PSCI版本高于0.2的回调函数,以及arm_pm_restart和pm_power_off。
static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs\n"); psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_FN_NATIVE(0_2, CPU_SUSPEND); psci_ops.cpu_suspend = psci_cpu_suspend; psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; psci_ops.cpu_off = psci_cpu_off; psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON); psci_ops.cpu_on = psci_cpu_on; psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE); psci_ops.migrate = psci_migrate; psci_ops.affinity_info = psci_affinity_info; psci_ops.migrate_info_type = psci_migrate_info_type; arm_pm_restart = psci_sys_reset; pm_power_off = psci_sys_poweroff; } |
这些函数都有一个共性invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0),着这里invoke_psci_fn指向__invoke_psci_fn_smc 。
__invoke_psci_fn_smc指向arch/arm64/kernel/psci-call.S定义的函数:
/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ ENTRY(__invoke_psci_fn_smc) smc #0 ret ENDPROC(__invoke_psci_fn_smc) |
http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf Chapter5有PSCI函数圆形和相关参数返回值的介绍。
第一个参数是Function ID,后面三个参数作为Function ID的参数。如果使用的是32位的参数,后三个参数通过r0-r3传递给Function ID,r0存放返回值;如果使用64位的参数,后三个参数通过W0-W3传递,w0存放返回值。这些Function ID的实现,在对应的Firmware中,但是可以通过上述pdf查看输入输出细节。
PSCI除了提供psci_ops的回调函数之外,还提供以restart和power off的arch-dependent函数arm_pm_restart和pm_power_off
比如machine_power_off和machine_restart调用:
void machine_power_off(void) { local_irq_disable(); smp_send_stop(); if (pm_power_off) pm_power_off(); } void machine_restart(char *cmd) { /* Disable interrupts first */ local_irq_disable(); smp_send_stop(); /* * UpdateCapsule() depends on the system being reset via * ResetSystem(). */ if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_reboot(reboot_mode, NULL); /* Now call the architecture specific reboot code. */ if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd); else do_kernel_restart(cmd); /* * Whoops - the architecture was unable to reboot. */ printk("Reboot failed -- System halted\n"); while (1); }
|
参考文档
Linux CPU core的电源管理(3)_cpu ops:http://www.wowotech.net/pm_subsystem/cpu_ops.html
cpu_ops、cpu_psci_ops、psci_ops、suspend_ops以及arm_idle_driver