欢迎访问 生活随笔!

生活随笔

当前位置: 首页 > 运维知识 > linux >内容正文

linux

Linux虚拟内存映射分析以及CMA测试 - 以SSD202为例

发布时间:2024/10/12 linux 74 豆豆
生活随笔 收集整理的这篇文章主要介绍了 Linux虚拟内存映射分析以及CMA测试 - 以SSD202为例 小编觉得挺不错的,现在分享给大家,帮大家做个参考.

在开始之前,先看一下SSD202的内存使用范围

硬件上SSD202内置128MB内存,其中有一部分预留给MMA,MMAP以及CMA

具体的大小设置在bootargs 中

bootargs = "wt_board=WT2022 console=ttyS0,115200 rootfstype=squashfs,ubifs rootwait=1

cma=8M

LX_MEM=0x7f00000 mma_heap=mma_heap_name0,miu=0,sz=0x1000000

mma_memblock_remove=1 highres=off mmap_reserved=fb,miu=0,sz=0x300000

max_start_off=0x7C00000,max_end_off=0x7F00000";

即最大可使用内存0x7f00000=127MB,CMA占用8M,MMA使用16M,MMAP使用3M,剩下就是kernel可使用范围

预留内存相关启动日志如下:

[ 0.000000] LXmem is 0x7f00000 PHYS_OFFSET is 0x20000000

[ 0.000000] Add mem start 0x20000000 size 0x7f00000!!!!

[ 0.000000]

[ 0.000000] LX_MEM = 0x20000000, 0x7f00000 (16*7=112+15=127MB)

[ 0.000000] LX_MEM2 = 0x0, 0x0

[ 0.000000] LX_MEM3 = 0x0, 0x0

[ 0.000000] EMAC_LEN= 0x0

[ 0.000000] DRAM_LEN= 0x0

----mmap_reserved=fb,miu=0,sz=0x300000=3M

[ 0.000000] deal_with_reserved_mmap memblock_reserve success mmap_reserved_config[0].reserved_start=

[ 0.000000] 0x27c00000 == 0x27f00000 - 3M(0x00300000)

[ 0.000000]

---mma_heap=mma_heap_name0,miu=0,sz=0x1000000=16M

[ 0.000000] deal_with_reserve_mma_heap memblock_reserve success mma_config[0].reserved_start=

[ 0.000000] 0x26c00000 == 0x27c00000 - 16M(0x1000000)

---cma size = 8M

[ 0.000000] cma: Reserved 8 MiB at 0x26400000 = 0x26c00000 - 8M

[ 0.000000] Memory policy: Data cache writealloc

[ 0.000000] percpu: Embedded 13 pages/cpu @c62bc000 s21208 r8192 d23848 u53248

[ 0.000000] Built 1 zonelists in Zone order, mobility grouping on. Total pages: 28162

[ 0.000000] Kernel command line: wt_board=WT2022 console=ttyS0,115200 rootfstype=squashfs,ubifs rootwait=1 cma=8M LX_MEM=0x7f00000 mma_heap=mma_heap_na 。。。。。)

[ 0.000000] PID hash table entries: 512 (order: -1, 2048 bytes)

[ 0.000000] Dentry cache hash table entries: 16384 (order: 4, 65536 bytes)

[ 0.000000] Inode-cache hash table entries: 8192 (order: 3, 32768 bytes)

[ 0.000000] Memory: 96736K/113664Kavailable (2467K kernel code, 222K rwdata, 1212K rodata, 164K init, 174K bss, 8736K reserved, 8192K cma-reserved)

从内存最顶端往下预留,最开始为3MB的MMAP,然后是16M的MMA,最后是8M的CMA区域,留给kernel的可用区域在0x26400000以下

接着看一下SSD202 kernel的虚拟映射表:

[ 0.000000] Virtual kernel memory layout: [ 0.000000] vector : 0xffff0000 - 0xffff1000 ( 4 kB) [ 0.000000] fixmap : 0xffc00000 - 0xfff00000 (3072 kB) [ 0.000000] vmalloc : 0xc8000000 - 0xff800000 ( 888 MB) [ 0.000000] lowmem : 0xc0000000 - 0xc7f00000 ( 127 MB) [ 0.000000] modules : 0xbf800000 - 0xc0000000 ( 8 MB) [ 0.000000] .text : 0xc0008000 - 0xc02710a8 (2469 kB) [ 0.000000] .init : 0xc03c3000 - 0xc03ec000 ( 164 kB) [ 0.000000] .data : 0xc03ec000 - 0xc0423bd8 ( 223 kB) [ 0.000000] .bss : 0xc0425000 - 0xc04509e0 ( 175 kB)

vector为中断向量映射区,位于内存最高端区域

fixmap为固定映射区,即虚拟地址固定,主要的kernel初始化阶段使用,比如console,dtb等以及热补丁应用

vmalloc,虚拟内存申请的地址范围,用于给vmalloc/ioremap动态分配内存

lowmem是线性映射区,1:1映射到物理地址

vmalloc区域和lowmem区域之间有一个1MB的hole,可以防止vmalloc越界

.text、.init、.data、.bss都属于lowmem区域,也即ZONE_NORMAL;

vector、fixmap、vmalloc属于ZONE_HIGHMEM区域。

modules属于用户空间

以上预留的MMA,MMAP,CMA等空间都在lowmem区

关于CMA

Contiguous Memory Allocator, CMA,连续内存分配器,用于分配连续的大块内存

CMA分配器,会Reserve一片物理内存区域:

  • 设备驱动不用时,内存管理系统将该区域用于分配和管理可移动类型页面;
  • 设备驱动使用时,用于连续内存分配,此时已经分配的页面需要进行迁移;
  • CMA并不进行内存管理,CMA area的内存最终还是要并入伙伴系统进行管理
  • cma_alloc用来从指定的CMA area上分配count个连续的page frame,按照align对齐
  • 此外,CMA分配器还可以与DMA子系统集成在一起,使用DMA的设备驱动程序无需使用单独的CMA API

    在SSD202中,cma相关的日志如下:

    ------USB HOST Controller 使用

    [ 0.000000] cma: Reserved 8 MiB at 0x26400000 = 0x26c00000 - 8M

    [ 1.371962] Sstar-ehci-2 soc:Sstar-ehci-2: EHCI Host Controller

    [ 1.377889] Sstar-ehci-2 soc:Sstar-ehci-2: new USB bus registered, assigned bus number 1

    [ 1.385990] cma: cma_alloc(cma c0435ef0, count 1, align 0)

    [ 1.386037] cma: cma_alloc(): returned c63bd840

    [ 1.386052] cma: cma_alloc(cma c0435ef0, count 1, align 0)

    [ 1.386068] cma: cma_alloc(): returned c63bd860

    [ 1.386083] cma: cma_alloc(cma c0435ef0, count 1, align 0)

    [ 1.386099] cma: cma_alloc(): returned c63bd880

    ------DMA 使用

    [ 1.748775] MSYS: DMEM request: [BDMA]:0x00000840

    [ 1.753324] cma: cma_alloc(cma c0435ef0, count 1, align 0)

    [ 1.753367] cma: cma_alloc(): returned c63bd900

    [ 1.753382] MSYS: DMEM request: [BDMA]:0x00000840 success, CPU phy:@0x26448000, virt:@0xC6448000

    [ 6.942669] MSYS: DMEM request: [emac0_buff]:0x00000812

    [ 6.947755] cma: cma_alloc(cma c0435ef0, count 1, align 0)

    [ 6.947946] cma: cma_alloc(): returned c63bd920

    ------ETH PHY 使用

    [ 6.947962] MSYS: DMEM request: [emac0_buff]:0x00000812 success, CPU phy:@0x26449000, virt:@0xC6449000

    [ 7.902325] >> [sdmmc] ms_sdmmc_probe

    [ 7.906510] cma: cma_alloc(cma c0435ef0, count 1, align 0)

    [ 7.906630] cma: cma_alloc(): returned c63bd940

    关于测试cma代码,借用宋老师的测试用例

    /* * kernel module helper for testing CMA * * Licensed under GPLv2 or later. */#define DEBUG#include <linux/module.h>#include <linux/device.h>#include <linux/fs.h>#include <linux/miscdevice.h>#include <linux/dma-mapping.h>#define CMA_NUM 10static struct device *cma_dev;static dma_addr_t dma_phys[CMA_NUM];static void *dma_virt[CMA_NUM];/* any read request will free coherent memory, eg. * cat /dev/cma_test */static ssize_tcma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos){int i;for (i = 0; i < CMA_NUM; i++){if (dma_virt[i]){dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);_dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);dma_virt[i] = NULL;break;}}return 0; }/* * any write request will alloc coherent memory, eg. * echo 0 > /dev/cma_test */static ssize_tcma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos){int i;int ret;unsigned int ctc = 0;for (i = 0; i < CMA_NUM; i++){if (!dma_virt[i]){dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);if (dma_virt[i]){void *p;/* touch every page in the allocated memory */for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)*(u32 *)p = ctc++;_dev_info(cma_dev, "[%d] alloc virt: %p phys: %p\n", i, dma_virt[i], (void *)dma_phys[i]);}else{dev_err(cma_dev, "[%d] no mem in CMA area\n", i);ret = -ENOMEM;}break;}}return count; }static const struct file_operations cma_test_fops = {.owner = THIS_MODULE,.read = cma_test_read,.write = cma_test_write,};static struct miscdevice cma_test_misc = {.name = "cma_test",.fops = &cma_test_fops,};static int __init cma_test_init(void){int i = 0;int ret = 0;ret = misc_register(&cma_test_misc);if (unlikely(ret)){pr_err("failed to register cma test misc device!\n");return ret;}cma_dev = cma_test_misc.this_device;cma_dev->coherent_dma_mask = ~0;for (i = 0; i < CMA_NUM; i++)dma_virt[i] = 0;_dev_info(cma_dev, "registered.\n");return ret; }module_init(cma_test_init);static void __exit cma_test_exit(void){misc_deregister(&cma_test_misc); }module_exit(cma_test_exit);MODULE_LICENSE("GPL");MODULE_AUTHOR("Barry Song <[email protected]>");MODULE_DESCRIPTION("kernel module to help the test of CMA");MODULE_ALIAS("CMA test");

    insmod cma-test.ko 加载模块

    root@wireless-tag:/# insmod cma-test
    [ 3971.556944] devtmpfs: create node [cma_test] dev-name [(null)]
    [ 3971.562783] misc cma_test: registered.

    echo 1 > /dev/cma-test 开始分配空间,每运行一次分配N(MB)

    [ 3992.269833] cma: cma_alloc(cma c0435ef0, count 256, align 4)
    [ 3992.269986] cma: cma_alloc(): returned c63be800
    [ 3992.270497] misc cma_test: [0] alloc virt: c64c0000 phys: 264c0000
    [ 3993.008489] random: fast init done

    cat /dev/cma-test 释放空间,每运行一次释放前一次分配的空间

    [ 4022.896707] cma: cma_release(page c63be800)
    [ 4022.896887] misc cma_test: free virt: c64c0000 phys: 264c0000

    根据内存分配关系,cma物理区域为0x26400000到26c00000,日志显示物理地址从264c0000开始增长,刚好在cma区

    因为一共8M空间,超过3次后,空间将不够

    root@wireless-tag:/# echo 1 > /dev/cma_test
    [ 4187.761083] misc cma_test: [0] alloc virt: c64c0000 phys: 264c0000    --1M
    root@wireless-tag:/#
    root@wireless-tag:/# echo 1 > /dev/cma_test
    [ 4188.574042] misc cma_test: [1] alloc virt: c65c0000 phys: 265c0000    --2M
    root@wireless-tag:/# echo 1 > /dev/cma_test
    [ 4189.444204] misc cma_test: [2] alloc virt: c67c0000 phys: 267c0000    --3M
    root@wireless-tag:/# echo 1 > /dev/cma_test
    [ 4190.766380] misc cma_test: [3] no mem in CMA area

    最后介绍一下fixmap映射,关于详细fixmap可以参考 Fix-Mapped Addresses

    在此以dtb加载为例进行介绍

    由于使用openwrt系统,有以下几个点比较特别:

    1. kernel,dtb,rootfs是打包在一起的,形成一个固件

    2. dtb在打包时带有特殊标记,已便于在启动过程中自动查找dtb在固件(内存)中的位置

    最后在dtb加载过程在arch/arm/kernel/devtree.c  setup_machine_fdt

    /*** setup_machine_fdt - Machine setup when an dtb was passed to the kernel* @dt_phys: physical address of dt blob** If a dtb was passed to the kernel in r2, then use it to choose the* correct machine_desc and to setup the system.*/ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) {const struct machine_desc *mdesc, *mdesc_best = NULL;void *virt_p = NULL;#if defined(CONFIG_ARCH_MULTIPLATFORM) || defined(CONFIG_ARM_SINGLE_ARMV7M)DT_MACHINE_START(GENERIC_DT, "Generic DT based system").l2c_aux_val = 0x0,.l2c_aux_mask = ~0x0,MACHINE_ENDmdesc_best = &__mach_desc_GENERIC_DT; #endifvirt_p = phys_to_virt(dt_phys);early_print("to check atags dtb phys %p, virt %p\n", (void*)dt_phys, virt_p);if (!dt_phys || !early_init_dt_verify(virt_p)){ #ifdef CONFIG_SS_BUILTIN_DTBif(early_init_dt_verify(builtin_dtb_start)){extern int early_atags_to_fdt(void *atag_list, void *fdt, int total_space);extern u32 builtin_dtb_size;//early_print("early_init_dt_verify() pass...\n");if((!dt_phys ) || (!early_atags_to_fdt(virt_p, builtin_dtb_start, builtin_dtb_size))){early_print("early_atags_to_fdt() success\n");}}else #endif{return NULL;}}mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);if (!mdesc) {const char *prop;int size;unsigned long dt_root;early_print("\nError: unrecognized/unsupported ""device tree compatible list:\n[ ");dt_root = of_get_flat_dt_root();prop = of_get_flat_dt_prop(dt_root, "compatible", &size);while (size > 0) {early_print("'%s' ", prop);size -= strlen(prop) + 1;prop += strlen(prop) + 1;}early_print("]\n\n");dump_machine_table(); /* does not return */}/* We really don't want to do this, but sometimes firmware provides buggy data */if (mdesc->dt_fixup)mdesc->dt_fixup();early_init_dt_scan_nodes();/* Change machine number to match the mdesc we're using */__machine_arch_type = mdesc->nr;return mdesc; }

    fixmap初始化在arch/arm/mm/mmu.c中,执行过程为 setup_arch --> early_fixmap_init

    void __init early_fixmap_init(void) {pmd_t *pmd;/** The early fixmap range spans multiple pmds, for which* we are not prepared:*/BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT)!= FIXADDR_TOP >> PMD_SHIFT);pmd = fixmap_pmd(FIXADDR_TOP);pmd_populate_kernel(&init_mm, pmd, bm_pte);pte_offset_fixmap = pte_offset_early_fixmap; }

     

    kernel加载初始化阶段的页面建立的linux/arch/arm/kernel/head.S中,有3级页表,PGD–>PMD–>PTE

    /** Setup the initial page tables. We only setup the barest* amount which are required to get the kernel running, which* generally means mapping in the kernel code.** r8 = phys_offset, r9 = cpuid, r10 = procinfo** Returns:* r0, r3, r5-r7 corrupted* r4 = physical page table address*/ __create_page_tables:pgtbl r4, r8 @ page table address/** Clear the swapper page table*/mov r0, r4mov r3, #0add r6, r0, #PG_DIR_SIZE 1: str r3, [r0], #4str r3, [r0], #4str r3, [r0], #4str r3, [r0], #4teq r0, r6bne 1b#ifdef CONFIG_ARM_LPAE/** Build the PGD table (first level) to point to the PMD table. A PGD* entry is 64-bit wide.*/mov r0, r4add r3, r4, #0x1000 @ first PMD table addressorr r3, r3, #3 @ PGD block typemov r6, #4 @ PTRS_PER_PGDmov r7, #1 << (55 - 32) @ L_PGD_SWAPPER 1: #ifdef CONFIG_CPU_ENDIAN_BE8str r7, [r0], #4 @ set top PGD entry bitsstr r3, [r0], #4 @ set bottom PGD entry bits #elsestr r3, [r0], #4 @ set bottom PGD entry bitsstr r7, [r0], #4 @ set top PGD entry bits #endifadd r3, r3, #0x1000 @ next PMD tablesubs r6, r6, #1bne 1badd r4, r4, #0x1000 @ point to the PMD tables #ifdef CONFIG_CPU_ENDIAN_BE8add r4, r4, #4 @ we only write the bottom word #endif #endifldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags/** Create identity mapping to cater for __enable_mmu.* This identity mapping will be removed by paging_init().*/adr r0, __turn_mmu_on_locldmia r0, {r3, r5, r6}sub r0, r0, r3 @ virt->phys offsetadd r5, r5, r0 @ phys __turn_mmu_onadd r6, r6, r0 @ phys __turn_mmu_on_endmov r5, r5, lsr #SECTION_SHIFTmov r6, r6, lsr #SECTION_SHIFT1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel basestr r3, [r4, r5, lsl #PMD_ORDER] @ identity mappingcmp r5, r6addlo r5, r5, #1 @ next sectionblo 1b/** Map our RAM from the start to the end of the kernel .bss section.*/add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)ldr r6, =(_end - 1)orr r3, r8, r7add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) 1: str r3, [r0], #1 << PMD_ORDERadd r3, r3, #1 << SECTION_SHIFTcmp r0, r6bls 1b#ifdef CONFIG_XIP_KERNEL/** Map the kernel image separately as it is not located in RAM.*/ #define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)mov r3, pcmov r3, r3, lsr #SECTION_SHIFTorr r3, r7, r3, lsl #SECTION_SHIFTadd r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!ldr r6, =(_edata_loc - 1)add r0, r0, #1 << PMD_ORDERadd r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) 1: cmp r0, r6add r3, r3, #1 << SECTION_SHIFTstrls r3, [r0], #1 << PMD_ORDERbls 1b #endif/** Then map boot params address in r2 if specified.* We map 2 sections in case the ATAGs/DTB crosses a section boundary.*/mov r0, r2, lsr #SECTION_SHIFTmovs r0, r0, lsl #SECTION_SHIFTsubne r3, r0, r8addne r3, r3, #PAGE_OFFSETaddne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)orrne r6, r7, r0strne r6, [r3], #1 << PMD_ORDERaddne r6, r6, #1 << SECTION_SHIFTstrne r6, [r3]#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)sub r4, r4, #4 @ Fixup page table pointer@ for 64-bit descriptors #endif#ifdef CONFIG_DEBUG_LL #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)/** Map in IO space for serial debugging.* This allows debug messages to be output* via a serial console before paging_init.*/addruart r7, r3, r0mov r3, r3, lsr #SECTION_SHIFTmov r3, r3, lsl #PMD_ORDERadd r0, r4, r3mov r3, r7, lsr #SECTION_SHIFTldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflagsorr r3, r7, r3, lsl #SECTION_SHIFT #ifdef CONFIG_ARM_LPAEmov r7, #1 << (54 - 32) @ XN #ifdef CONFIG_CPU_ENDIAN_BE8str r7, [r0], #4str r3, [r0], #4 #elsestr r3, [r0], #4str r7, [r0], #4 #endif #elseorr r3, r3, #PMD_SECT_XNstr r3, [r0], #4 #endif#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING *//* we don't need any serial debugging mappings */ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags #endif#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)/** If we're using the NetWinder or CATS, we also need to map* in the 16550-type serial port for the debug messages*/add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)orr r3, r7, #0x7c000000str r3, [r0] #endif #ifdef CONFIG_ARCH_RPC/** Map in screen at 0x02000000 & SCREEN2_BASE* Similar reasons here - for debug. This is* only for Acorn RiscPC architectures.*/add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)orr r3, r7, #0x02000000str r3, [r0]add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)str r3, [r0] #endif #endif #ifdef CONFIG_ARM_LPAEsub r4, r4, #0x1000 @ point to the PGD table #endifret lr ENDPROC(__create_page_tables)

    关于swapper_pg_dir

    /*

    * swapper_pg_dir is the virtual address of the initial page table.

    * We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must

    * make sure that KERNEL_RAM_VADDR is correctly set. Currently, we expect

    * the least significant 16 bits to be 0x8000, but we could probably

    * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.

    */

    #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)

    #if (KERNEL_RAM_VADDR & 0xffff) != 0x8000

    #error KERNEL_RAM_VADDR must start at 0xXXXX8000

    总结

    以上是生活随笔为你收集整理的Linux虚拟内存映射分析以及CMA测试 - 以SSD202为例的全部内容,希望文章能够帮你解决所遇到的问题。

    如果觉得生活随笔网站内容还不错,欢迎将生活随笔推荐给好友。