diff --git a/arch/um/Kconfig b/arch/um/Kconfig index c89575d05021..18051b1cfce0 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,6 +5,7 @@ menu "UML-specific options" config UML bool default y + select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -32,6 +33,8 @@ config UML select HAVE_ARCH_VMAP_STACK select HAVE_RUST select ARCH_HAS_UBSAN + select HAVE_ARCH_TRACEHOOK + select THREAD_INFO_IN_TASK config MMU bool @@ -94,7 +97,7 @@ config MAY_HAVE_RUNTIME_DEPS config STATIC_LINK bool "Force a static link" - depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS + depends on !MAY_HAVE_RUNTIME_DEPS help This option gives you the ability to force a static link of UML. Normally, UML is linked as a shared binary. This is inconvenient for @@ -209,8 +212,8 @@ config MMAPPER config PGTABLE_LEVELS int - default 3 if 3_LEVEL_PGTABLES - default 2 + default 4 if 64BIT + default 2 if !64BIT config UML_TIME_TRAVEL_SUPPORT bool @@ -227,6 +230,21 @@ config UML_TIME_TRAVEL_SUPPORT It is safe to say Y, but you probably don't need this. +config UML_MAX_USERSPACE_ITERATIONS + int + prompt "Maximum number of unscheduled userspace iterations" + default 10000 + depends on UML_TIME_TRAVEL_SUPPORT + help + In UML inf-cpu and ext time-travel mode userspace can run without being + interrupted. This will eventually overwhelm the kernel and create OOM + situations (mainly RCU not running). This setting specifies the number + of kernel/userspace switches (minor/major page fault, signal or syscall) + for the same userspace thread before the sched_clock is advanced by a + jiffie to trigger scheduling. + + Setting it to zero disables the feature. + config KASAN_SHADOW_OFFSET hex depends on KASAN diff --git a/arch/um/Makefile b/arch/um/Makefile index 00b63bac5eff..1d36a613aad8 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ + -D__close_range=kernel__close_range KBUILD_RUSTFLAGS += -Crelocation-model=pie @@ -70,7 +71,9 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ -D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \ - -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ + -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ \ + -include $(srctree)/include/linux/compiler-version.h \ + -include $(srctree)/include/linux/kconfig.h #This will adjust *FLAGS accordingly to the platform. include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas index 67323b028999..1a27e65bcb9c 100644 --- a/arch/um/Makefile-skas +++ b/arch/um/Makefile-skas @@ -3,15 +3,15 @@ # Licensed under the GPL # -GPROF_OPT += -pg +export UM_GPROF_OPT += -pg ifdef CONFIG_CC_IS_CLANG -GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping +export UM_GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping else -GCOV_OPT += -fprofile-arcs -ftest-coverage +export UM_GCOV_OPT += -fprofile-arcs -ftest-coverage endif -CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT) -CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT) -LINK-$(CONFIG_GCOV) += $(GCOV_OPT) -LINK-$(CONFIG_GPROF) += $(GPROF_OPT) +CFLAGS-$(CONFIG_GCOV) += $(UM_GCOV_OPT) +CFLAGS-$(CONFIG_GPROF) += $(UM_GPROF_OPT) +LINK-$(CONFIG_GCOV) += $(UM_GCOV_OPT) +LINK-$(CONFIG_GPROF) += $(UM_GPROF_OPT) diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 9c9c77f1255a..1ffa088739f4 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -1,4 +1,3 @@ -CONFIG_3_LEVEL_PGTABLES=y # CONFIG_COMPACTION is not set CONFIG_BINFMT_MISC=m CONFIG_HOSTFS=y diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index a66e556012c4..35f9beeb19b3 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -161,6 +161,8 @@ static __noreturn int winch_thread(void *arg) int count; char c = 1; + os_set_pdeathsig(); + pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; count = write(pipe_fd, &c, sizeof(c)); diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 9d228878cea2..0ac149de1ac0 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP); #ifndef MODULE static int set_dsp(char *name, int *add) { + *add = 0; dsp = name; return 0; } @@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=\n" DSP_HELP); static int set_mixer(char *name, int *add) { + *add = 0; mixer = name; return 0; } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 77c4afb8ab90..75d04fb4994a 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -336,7 +336,7 @@ static struct platform_driver uml_net_driver = { static void net_device_release(struct device *dev) { - struct uml_net *device = dev_get_drvdata(dev); + struct uml_net *device = container_of(dev, struct uml_net, pdev.dev); struct net_device *netdev = device->dev; struct uml_net_private *lp = netdev_priv(netdev); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 7f28ec1929dc..66c1a8835e36 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -779,7 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev) static void ubd_device_release(struct device *dev) { - struct ubd *ubd_dev = dev_get_drvdata(dev); + struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev); blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); @@ -898,6 +898,8 @@ static int ubd_add(int n, char **error_out) if (err) goto out_cleanup_disk; + ubd_dev->disk = disk; + return 0; out_cleanup_disk: @@ -1499,6 +1501,7 @@ int io_thread(void *arg) { int n, count, written, res; + os_set_pdeathsig(); os_fix_helper_signals(); while(1){ diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index c992da83268d..64c09db392c1 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -815,7 +815,8 @@ static struct platform_driver uml_net_driver = { static void vector_device_release(struct device *dev) { - struct vector_device *device = dev_get_drvdata(dev); + struct vector_device *device = + container_of(dev, struct vector_device, pdev.dev); struct net_device *netdev = device->dev; list_del(&device->list); diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h index 6f147cd3c9f7..fcfa3b7e021b 100644 --- a/arch/um/drivers/vhost_user.h +++ b/arch/um/drivers/vhost_user.h @@ -10,6 +10,7 @@ /* Feature bits */ #define VHOST_USER_F_PROTOCOL_FEATURES 30 /* Protocol feature bits */ +#define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CONFIG 9 @@ -23,7 +24,8 @@ /* Supported transport features */ #define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES) /* Supported protocol features */ -#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ +#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 2b6e701776b6..cc3be48a9d6e 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -56,6 +56,7 @@ struct virtio_uml_device { int sock, req_fd, irq; u64 features; u64 protocol_features; + u64 max_vqs; u8 status; u8 registered:1; u8 suspended:1; @@ -72,8 +73,6 @@ struct virtio_uml_vq_info { bool suspended; }; -extern unsigned long long physmem_size, highmem; - #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) /* Vhost-user protocol */ @@ -343,6 +342,17 @@ static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, protocol_features); } +static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev, + u64 *queue_num) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_QUEUE_NUM); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, queue_num); +} + static void vhost_user_reply(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, int response) { @@ -516,6 +526,15 @@ static int vhost_user_init(struct virtio_uml_device *vu_dev) return rc; } + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) { + rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs); + if (rc) + return rc; + } else { + vu_dev->max_vqs = U64_MAX; + } + return 0; } @@ -625,7 +644,7 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) { struct vhost_user_msg msg = { .header.request = VHOST_USER_SET_MEM_TABLE, - .header.size = sizeof(msg.payload.mem_regions), + .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]), .payload.mem_regions.num = 1, }; unsigned long reserved = uml_reserved - uml_physmem; @@ -673,13 +692,6 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) if (rc < 0) return rc; - if (highmem) { - msg.payload.mem_regions.num++; - rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, - &fds[1], &msg.payload.mem_regions.regions[1]); - if (rc < 0) - return rc; - } return vhost_user_send(vu_dev, false, &msg, fds, msg.payload.mem_regions.num); @@ -897,7 +909,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, { struct virtio_uml_vq_info *info = vq->priv; int call_fds[2]; - int rc; + int rc, irq; /* no call FD needed/desired in this case */ if (vu_dev->protocol_features & @@ -914,19 +926,23 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, return rc; info->call_fd = call_fds[0]; - rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, - vu_interrupt, IRQF_SHARED, info->name, vq); - if (rc < 0) + irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (irq < 0) { + rc = irq; goto close_both; + } rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); if (rc) goto release_irq; + vu_dev->irq = irq; + goto out; release_irq: - um_free_irq(vu_dev->irq, vq); + um_free_irq(irq, vq); close_both: os_close_file(call_fds[0]); out: @@ -1023,7 +1039,9 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vq; /* not supported for now */ - if (WARN_ON(nvqs > 64)) + if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs, + "%d VQs requested, only up to 64 or %lld supported\n", + nvqs, vu_dev->max_vqs)) return -EINVAL; rc = vhost_user_set_mem_table(vu_dev); @@ -1210,6 +1228,7 @@ static int virtio_uml_probe(struct platform_device *pdev) vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; vu_dev->pdev = pdev; vu_dev->req_fd = -1; + vu_dev->irq = UM_IRQ_ALLOC; time_travel_propagate_time(); diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 18f902da8e99..428f2c5158c2 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += bug.h generic-y += compat.h -generic-y += current.h generic-y += device.h generic-y += dma-mapping.h generic-y += emergency-restart.h diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h new file mode 100644 index 000000000000..de64e032d66c --- /dev/null +++ b/arch/um/include/asm/current.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_CURRENT_H +#define __ASM_CURRENT_H + +#include +#include + +#ifndef __ASSEMBLY__ + +struct task_struct; +extern struct task_struct *cpu_tasks[NR_CPUS]; + +static __always_inline struct task_struct *get_current(void) +{ + return cpu_tasks[0]; +} + + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CURRENT_H */ diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 834313ecd3d6..3d516f3ca9c7 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -29,51 +29,35 @@ struct page; #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) - -typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pgd; } pgd_t; -#define pte_val(p) ((p).pte) - -#define pte_get_bits(p, bits) ((p).pte & (bits)) -#define pte_set_bits(p, bits) ((p).pte |= (bits)) -#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) -#define pte_copy(to, from) ({ (to).pte = (from).pte; }) -#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) -#define pte_set_val(p, phys, prot) \ - ({ (p).pte = (phys) | pgprot_val(prot); }) - -#define pmd_val(x) ((x).pmd) -#define __pmd(x) ((pmd_t) { (x) } ) - -typedef unsigned long long phys_t; - -#else - typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgd; } pgd_t; -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 + typedef struct { unsigned long pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) -#endif + +#if CONFIG_PGTABLE_LEVELS > 3 + +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) } ) + +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ #define pte_val(x) ((x).pte) - #define pte_get_bits(p, bits) ((p).pte & (bits)) #define pte_set_bits(p, bits) ((p).pte |= (bits)) #define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) #define pte_copy(to, from) ((to).pte = (from).pte) -#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEEDSYNC)) #define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot)) typedef unsigned long phys_t; -#endif - typedef struct { unsigned long pgprot; } pgprot_t; typedef struct page *pgtable_t; diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index de5e31c64793..04fb4e6969a4 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -31,7 +31,7 @@ do { \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 #define __pmd_free_tlb(tlb, pmd, address) \ do { \ @@ -39,6 +39,15 @@ do { \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ } while (0) +#if CONFIG_PGTABLE_LEVELS > 3 + +#define __pud_free_tlb(tlb, pud, address) \ +do { \ + pagetable_pud_dtor(virt_to_ptdesc(pud)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ +} while (0) + +#endif #endif #endif diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index 8256ecc5b919..ab0c8dd86564 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -31,7 +31,7 @@ printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \ pgd_val(e)) -static inline int pgd_newpage(pgd_t pgd) { return 0; } +static inline int pgd_needsync(pgd_t pgd) { return 0; } static inline void pgd_mkuptodate(pgd_t pgd) { } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-4level.h similarity index 60% rename from arch/um/include/asm/pgtable-3level.h rename to arch/um/include/asm/pgtable-4level.h index 8a5032ec231f..0d279caee93c 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-4level.h @@ -4,21 +4,25 @@ * Derived from include/asm-i386/pgtable.h */ -#ifndef __UM_PGTABLE_3LEVEL_H -#define __UM_PGTABLE_3LEVEL_H +#ifndef __UM_PGTABLE_4LEVEL_H +#define __UM_PGTABLE_4LEVEL_H -#include +#include -/* PGDIR_SHIFT determines what a third-level page table entry can map */ +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#ifdef CONFIG_64BIT -#define PGDIR_SHIFT 30 -#else -#define PGDIR_SHIFT 31 -#endif +#define PGDIR_SHIFT 39 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) +/* PUD_SHIFT determines the size of the area a third-level page table can + * map + */ + +#define PUD_SHIFT 30 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + /* PMD_SHIFT determines the size of the area a second-level page table can * map */ @@ -32,13 +36,9 @@ */ #define PTRS_PER_PTE 512 -#ifdef CONFIG_64BIT #define PTRS_PER_PMD 512 +#define PTRS_PER_PUD 512 #define PTRS_PER_PGD 512 -#else -#define PTRS_PER_PMD 1024 -#define PTRS_PER_PGD 1024 -#endif #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) @@ -48,11 +48,14 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ pgd_val(e)) -#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE)) +#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEEDSYNC)) #define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pud_present(x) (pud_val(x) & _PAGE_PRESENT) #define pud_populate(mm, pud, pmd) \ @@ -60,23 +63,40 @@ #define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) -static inline int pgd_newpage(pgd_t pgd) +#define p4d_none(x) (!(p4d_val(x) & ~_PAGE_NEEDSYNC)) +#define p4d_bad(x) ((p4d_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define p4d_present(x) (p4d_val(x) & _PAGE_PRESENT) +#define p4d_populate(mm, p4d, pud) \ + set_p4d(p4d, __p4d(_PAGE_TABLE + __pa(pud))) + +#define set_p4d(p4dptr, p4dval) (*(p4dptr) = (p4dval)) + + +static inline int pgd_needsync(pgd_t pgd) { - return(pgd_val(pgd) & _PAGE_NEWPAGE); + return pgd_val(pgd) & _PAGE_NEEDSYNC; } -static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } +static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEEDSYNC; } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) static inline void pud_clear (pud_t *pud) { - set_pud(pud, __pud(_PAGE_NEWPAGE)); + set_pud(pud, __pud(_PAGE_NEEDSYNC)); +} + +static inline void p4d_clear (p4d_t *p4d) +{ + set_p4d(p4d, __p4d(_PAGE_NEEDSYNC)); } #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) #define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK)) +#define p4d_page(p4d) phys_to_page(p4d_val(p4d) & PAGE_MASK) +#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & PAGE_MASK)) + static inline unsigned long pte_pfn(pte_t pte) { return phys_to_pfn(pte_val(pte)); @@ -97,4 +117,3 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) } #endif - diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index faab5a2a4b06..0bd60afcc37d 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -11,8 +11,7 @@ #include #define _PAGE_PRESENT 0x001 -#define _PAGE_NEWPAGE 0x002 -#define _PAGE_NEWPROT 0x004 +#define _PAGE_NEEDSYNC 0x002 #define _PAGE_RW 0x020 #define _PAGE_USER 0x040 #define _PAGE_ACCESSED 0x080 @@ -24,10 +23,12 @@ /* We borrow bit 10 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE 0x400 -#ifdef CONFIG_3_LEVEL_PGTABLES -#include -#else +#if CONFIG_PGTABLE_LEVELS == 4 +#include +#elif CONFIG_PGTABLE_LEVELS == 2 #include +#else +#error "Unsupported number of page table levels" #endif extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; @@ -78,22 +79,22 @@ extern unsigned long end_iomem; */ #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) -#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) +#define pte_clear(mm, addr, xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEEDSYNC)) -#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEEDSYNC)) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEEDSYNC; } while (0) -#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) -#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) +#define pmd_needsync(x) (pmd_val(x) & _PAGE_NEEDSYNC) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEEDSYNC) -#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) -#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) +#define pud_needsync(x) (pud_val(x) & _PAGE_NEEDSYNC) +#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEEDSYNC) -#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE) -#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE) +#define p4d_needsync(x) (p4d_val(x) & _PAGE_NEEDSYNC) +#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEEDSYNC) #define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) @@ -144,14 +145,9 @@ static inline int pte_young(pte_t pte) return pte_get_bits(pte, _PAGE_ACCESSED); } -static inline int pte_newpage(pte_t pte) +static inline int pte_needsync(pte_t pte) { - return pte_get_bits(pte, _PAGE_NEWPAGE); -} - -static inline int pte_newprot(pte_t pte) -{ - return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); + return pte_get_bits(pte, _PAGE_NEEDSYNC); } /* @@ -160,12 +156,6 @@ static inline int pte_newprot(pte_t pte) * ================================= */ -static inline pte_t pte_mknewprot(pte_t pte) -{ - pte_set_bits(pte, _PAGE_NEWPROT); - return(pte); -} - static inline pte_t pte_mkclean(pte_t pte) { pte_clear_bits(pte, _PAGE_DIRTY); @@ -180,19 +170,14 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - if (likely(pte_get_bits(pte, _PAGE_RW))) - pte_clear_bits(pte, _PAGE_RW); - else - return pte; - return(pte_mknewprot(pte)); + pte_clear_bits(pte, _PAGE_RW); + return pte; } static inline pte_t pte_mkread(pte_t pte) { - if (unlikely(pte_get_bits(pte, _PAGE_USER))) - return pte; pte_set_bits(pte, _PAGE_USER); - return(pte_mknewprot(pte)); + return pte; } static inline pte_t pte_mkdirty(pte_t pte) @@ -209,23 +194,19 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite_novma(pte_t pte) { - if (unlikely(pte_get_bits(pte, _PAGE_RW))) - return pte; pte_set_bits(pte, _PAGE_RW); - return(pte_mknewprot(pte)); + return pte; } static inline pte_t pte_mkuptodate(pte_t pte) { - pte_clear_bits(pte, _PAGE_NEWPAGE); - if(pte_present(pte)) - pte_clear_bits(pte, _PAGE_NEWPROT); - return(pte); + pte_clear_bits(pte, _PAGE_NEEDSYNC); + return pte; } -static inline pte_t pte_mknewpage(pte_t pte) +static inline pte_t pte_mkneedsync(pte_t pte) { - pte_set_bits(pte, _PAGE_NEWPAGE); + pte_set_bits(pte, _PAGE_NEEDSYNC); return(pte); } @@ -233,13 +214,11 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); - /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so - * fix_range knows to unmap it. _PAGE_NEWPROT is specific to - * mapped pages. + /* If it's a swap entry, it needs to be marked _PAGE_NEEDSYNC so + * update_pte_range knows to unmap it. */ - *pteptr = pte_mknewpage(*pteptr); - if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); + *pteptr = pte_mkneedsync(*pteptr); } #define PFN_PTE_SHIFT PAGE_SHIFT @@ -279,7 +258,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { - return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEEDSYNC); } /* @@ -294,8 +273,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) ({ pte_t pte; \ \ pte_set_val(pte, page_to_phys(page), (pgprot)); \ - if (pte_present(pte)) \ - pte_mknewprot(pte_mknewpage(pte)); \ pte;}) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -329,7 +306,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); * <--------------- offset ----------------> E < type -> 0 0 0 1 0 * * E is the exclusive marker that is not stored in swap entries. - * _PAGE_NEWPAGE (bit 1) is always set to 1 in set_pte(). + * _PAGE_NEEDSYNC (bit 1) is always set to 1 in set_pte(). */ #define __swp_type(x) (((x).val >> 5) & 0x1f) #define __swp_offset(x) ((x).val >> 11) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index bce4595798da..5d6356eafffe 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,10 +20,7 @@ struct task_struct; struct mm_struct; struct thread_struct { - struct pt_regs regs; struct pt_regs *segv_regs; - void *fault_addr; - jmp_buf *fault_catcher; struct task_struct *prev_sched; struct arch_thread arch; jmp_buf switch_buf; @@ -33,12 +30,14 @@ struct thread_struct { void *arg; } thread; } request; + + /* Contains variable sized FP registers */ + struct pt_regs regs; }; #define INIT_THREAD \ { \ .regs = EMPTY_REGS, \ - .fault_addr = NULL, \ .prev_sched = NULL, \ .arch = INIT_ARCH_THREAD, \ .request = { } \ diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index c7b4b49826a2..f9ad06fcc991 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -17,35 +17,17 @@ #include struct thread_info { - struct task_struct *task; /* main task structure */ unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct thread_info *real_thread; /* Points to non-IRQ stack */ - unsigned long aux_fp_regs[FP_SIZE]; /* auxiliary fp_regs to save/restore - them out-of-band */ }; #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .real_thread = NULL, \ -} - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - unsigned long mask = THREAD_SIZE - 1; - void *p; - - asm volatile ("" : "=r" (p) : "0" (&ti)); - ti = (struct thread_info *) (((unsigned long)p) & ~mask); - return ti; } #endif diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h index db997976b6ea..13a3009942be 100644 --- a/arch/um/include/asm/tlbflush.h +++ b/arch/um/include/asm/tlbflush.h @@ -9,8 +9,8 @@ #include /* - * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls - * from the process handling the MM (which can be the kernel itself). + * In UML, we need to sync the TLB over by using mmap/munmap syscalls from + * the process handling the MM (which can be the kernel itself). * * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes * we catch all PTE transitions where memory that was unusable becomes usable. diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 06292fca5a4d..ea65f151bf48 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -30,25 +30,23 @@ #include -struct cpu_task { - void *task; -}; +struct task_struct; +extern struct task_struct *cpu_tasks[]; -extern struct cpu_task cpu_tasks[]; +extern unsigned long long physmem_size; extern unsigned long high_physmem; extern unsigned long uml_physmem; extern unsigned long uml_reserved; extern unsigned long end_vm; extern unsigned long start_vm; -extern unsigned long long highmem; extern unsigned long brk_start; extern unsigned long host_task_size; extern unsigned long stub_start; -extern int linux_main(int argc, char **argv); +extern int linux_main(int argc, char **argv, char **envp); extern void uml_finishsetup(void); struct siginfo; diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 579ed946a3a9..73f3a4792ed8 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -6,7 +6,6 @@ DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); -DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_GFP_KERNEL, GFP_KERNEL); DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); @@ -15,17 +14,3 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); - -#ifdef CONFIG_PRINTK -DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); -#endif -#ifdef CONFIG_UML_X86 -DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); -#endif -#ifdef CONFIG_64BIT -DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); -#endif -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); -#endif - diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index d8ffd2db168e..f21dc8517538 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -60,7 +60,6 @@ extern unsigned long from_irq_stack(int nested); extern int singlestepping(void); extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); -extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs); extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void fatal_sigsegv(void) __attribute__ ((noreturn)); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index 11a723a58545..adfa08062f88 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -47,10 +47,9 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern void mem_total_pages(unsigned long physmem, unsigned long iomem, - unsigned long highmem); +extern void mem_total_pages(unsigned long physmem, unsigned long iomem); extern void setup_physmem(unsigned long start, unsigned long usable, - unsigned long len, unsigned long long highmem); + unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 9a039d6f1f74..5babad8c5f75 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -145,7 +145,6 @@ extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); extern int os_get_ifname(int fd, char *namebuf); extern int os_set_slip(int fd); extern int os_mode_fd(int fd, int mode); -extern int os_fsync_file(int fd); extern int os_seek_file(int fd, unsigned long long offset); extern int os_open_file(const char *file, struct openflags flags, int mode); @@ -199,15 +198,11 @@ extern int create_mem_file(unsigned long long len); extern void report_enomem(void); /* process.c */ -extern unsigned long os_process_pc(int pid); -extern int os_process_parent(int pid); extern void os_alarm_process(int pid); -extern void os_stop_process(int pid); extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); extern int os_getpid(void); -extern int os_getpgrp(void); extern void init_new_thread_signals(void); @@ -220,6 +215,8 @@ extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); extern int os_mincore(void *addr, unsigned long len); +void os_set_pdeathsig(void); + /* execvp.c */ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); /* helper.c */ @@ -244,7 +241,6 @@ extern void block_signals(void); extern void unblock_signals(void); extern int um_set_signals(int enable); extern int um_set_signals_trace(int enable); -extern int os_is_signal_stack(void); extern void deliver_alarm(void); extern void register_pm_wake_signal(void); extern void block_signals_hard(void); @@ -283,13 +279,11 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, int phys_fd, unsigned long long offset); int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); -int protect(struct mm_id *mm_idp, unsigned long addr, - unsigned long len, unsigned int prot); /* skas/process.c */ extern int is_skas_winch(int pid, int fd, void *data); extern int start_userspace(unsigned long stub_stack); -extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs); +extern void userspace(struct uml_pt_regs *regs); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void switch_threads(jmp_buf *me, jmp_buf *you); extern int start_idle_thread(void *stack, jmp_buf *switch_buf); @@ -329,9 +323,6 @@ extern int __ignore_sigio_fd(int fd); /* tty.c */ extern int get_pty(void); -/* sys-$ARCH/task_size.c */ -extern unsigned long os_get_top_address(void); - long syscall(long number, ...); /* irqflags tracing */ diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index a0450326521c..7d81b2339a48 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -8,12 +8,6 @@ #include -extern int save_i387_registers(int pid, unsigned long *fp_regs); -extern int restore_i387_registers(int pid, unsigned long *fp_regs); -extern int save_fp_registers(int pid, unsigned long *fp_regs); -extern int restore_fp_registers(int pid, unsigned long *fp_regs); -extern int save_fpx_registers(int pid, unsigned long *fp_regs); -extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int init_pid_registers(int pid); extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern int get_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 2b6b44759dfa..81a4cace032c 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -12,6 +12,17 @@ #include #include +struct stub_init_data { + unsigned long stub_start; + + int stub_code_fd; + unsigned long stub_code_offset; + int stub_data_fd; + unsigned long stub_data_offset; + + unsigned long segv_handler; +}; + #define STUB_NEXT_SYSCALL(s) \ ((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len)) @@ -19,7 +30,6 @@ enum stub_syscall_type { STUB_SYSCALL_UNSET = 0, STUB_SYSCALL_MMAP, STUB_SYSCALL_MUNMAP, - STUB_SYSCALL_MPROTECT, }; struct stub_syscall { diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h index c8db2f213dba..7c2b277b7eb0 100644 --- a/arch/um/include/shared/timetravel.h +++ b/arch/um/include/shared/timetravel.h @@ -12,14 +12,13 @@ enum time_travel_mode { TT_MODE_EXTERNAL, }; -#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ - defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) extern enum time_travel_mode time_travel_mode; extern int time_travel_should_print_bc_msg; #else #define time_travel_mode TT_MODE_OFF #define time_travel_should_print_bc_msg 0 -#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ +#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ void _time_travel_print_bc_msg(void); static inline void time_travel_print_bc_msg(void) diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index bbab79c0c074..139eb78a4767 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -38,7 +38,7 @@ extern void panic(const char *fmt, ...) #define UM_KERN_DEBUG KERN_DEBUG #define UM_KERN_CONT KERN_CONT -#ifdef UML_CONFIG_PRINTK +#if IS_ENABLED(CONFIG_PRINTK) #define printk(...) _printk(__VA_ARGS__) extern int _printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c index 8d78ced9e08f..15c342426489 100644 --- a/arch/um/kernel/dtb.c +++ b/arch/um/kernel/dtb.c @@ -31,6 +31,7 @@ void uml_dtb_init(void) static int __init uml_dtb_setup(char *line, int *add) { + *add = 0; dtb = line; return 0; } diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 3385d653ebd0..a36b7918a011 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -116,8 +116,6 @@ SECTIONS .fini_array : { *(.fini_array) } .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) - . = ALIGN(KERNEL_STACK_SIZE); - *(.data..init_irqstack) DATA_DATA *(.data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) @@ -178,3 +176,6 @@ SECTIONS DISCARDS } + +ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE, + "STUB code must not be larger than one page"); diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 47b8cb1a1156..99dba827461c 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -34,6 +34,7 @@ int __init read_initrd(void) static int __init uml_initrd_setup(char *line, int *add) { + *add = 0; initrd = line; return 0; } diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 534e91797f89..338450741aac 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -674,115 +674,3 @@ void __init init_IRQ(void) /* Initialize EPOLL Loop */ os_setup_epoll(); } - -/* - * IRQ stack entry and exit: - * - * Unlike i386, UML doesn't receive IRQs on the normal kernel stack - * and switch over to the IRQ stack after some preparation. We use - * sigaltstack to receive signals on a separate stack from the start. - * These two functions make sure the rest of the kernel won't be too - * upset by being on a different stack. The IRQ stack has a - * thread_info structure at the bottom so that current et al continue - * to work. - * - * to_irq_stack copies the current task's thread_info to the IRQ stack - * thread_info and sets the tasks's stack to point to the IRQ stack. - * - * from_irq_stack copies the thread_info struct back (flags may have - * been modified) and resets the task's stack pointer. - * - * Tricky bits - - * - * What happens when two signals race each other? UML doesn't block - * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal - * could arrive while a previous one is still setting up the - * thread_info. - * - * There are three cases - - * The first interrupt on the stack - sets up the thread_info and - * handles the interrupt - * A nested interrupt interrupting the copying of the thread_info - - * can't handle the interrupt, as the stack is in an unknown state - * A nested interrupt not interrupting the copying of the - * thread_info - doesn't do any setup, just handles the interrupt - * - * The first job is to figure out whether we interrupted stack setup. - * This is done by xchging the signal mask with thread_info->pending. - * If the value that comes back is zero, then there is no setup in - * progress, and the interrupt can be handled. If the value is - * non-zero, then there is stack setup in progress. In order to have - * the interrupt handled, we leave our signal in the mask, and it will - * be handled by the upper handler after it has set up the stack. - * - * Next is to figure out whether we are the outer handler or a nested - * one. As part of setting up the stack, thread_info->real_thread is - * set to non-NULL (and is reset to NULL on exit). This is the - * nesting indicator. If it is non-NULL, then the stack is already - * set up and the handler can run. - */ - -static unsigned long pending_mask; - -unsigned long to_irq_stack(unsigned long *mask_out) -{ - struct thread_info *ti; - unsigned long mask, old; - int nested; - - mask = xchg(&pending_mask, *mask_out); - if (mask != 0) { - /* - * If any interrupts come in at this point, we want to - * make sure that their bits aren't lost by our - * putting our bit in. So, this loop accumulates bits - * until xchg returns the same value that we put in. - * When that happens, there were no new interrupts, - * and pending_mask contains a bit for each interrupt - * that came in. - */ - old = *mask_out; - do { - old |= mask; - mask = xchg(&pending_mask, old); - } while (mask != old); - return 1; - } - - ti = current_thread_info(); - nested = (ti->real_thread != NULL); - if (!nested) { - struct task_struct *task; - struct thread_info *tti; - - task = cpu_tasks[ti->cpu].task; - tti = task_thread_info(task); - - *ti = *tti; - ti->real_thread = tti; - task->stack = ti; - } - - mask = xchg(&pending_mask, 0); - *mask_out |= mask | nested; - return 0; -} - -unsigned long from_irq_stack(int nested) -{ - struct thread_info *ti, *to; - unsigned long mask; - - ti = current_thread_info(); - - pending_mask = 1; - - to = ti->real_thread; - current->stack = to; - ti->real_thread = NULL; - *to = *ti; - - mask = xchg(&pending_mask, 0); - return mask & ~1; -} - diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index a5b4fe2ad931..53248ed04771 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -51,8 +50,6 @@ EXPORT_SYMBOL(empty_zero_page); pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* Initialized at boot time, and readonly after that */ -unsigned long long highmem; -EXPORT_SYMBOL(highmem); int kmalloc_ok = 0; /* Used during early boot */ @@ -98,7 +95,7 @@ static void __init one_page_table_init(pmd_t *pmd) static void __init one_md_table_init(pud_t *pud) { -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); if (!pmd_table) panic("%s: Failed to allocate %lu bytes align=%lx\n", @@ -109,6 +106,19 @@ static void __init one_md_table_init(pud_t *pud) #endif } +static void __init one_ud_table_init(p4d_t *p4d) +{ +#if CONFIG_PGTABLE_LEVELS > 3 + pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pud_table) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table))); + BUG_ON(pud_table != pud_offset(p4d, 0)); +#endif +} + static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { @@ -126,6 +136,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end, for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { p4d = p4d_offset(pgd, vaddr); + if (p4d_none(*p4d)) + one_ud_table_init(p4d); pud = pud_offset(p4d, vaddr); if (pud_none(*pud)) one_md_table_init(pud); diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index fb2adfb49945..a74f17b033c4 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,19 +22,14 @@ static int physmem_fd = -1; unsigned long high_physmem; EXPORT_SYMBOL(high_physmem); -extern unsigned long long physmem_size; - -void __init mem_total_pages(unsigned long physmem, unsigned long iomem, - unsigned long highmem) +void __init mem_total_pages(unsigned long physmem, unsigned long iomem) { - unsigned long phys_pages, highmem_pages; - unsigned long iomem_pages, total_pages; + unsigned long phys_pages, iomem_pages, total_pages; - phys_pages = physmem >> PAGE_SHIFT; - iomem_pages = iomem >> PAGE_SHIFT; - highmem_pages = highmem >> PAGE_SHIFT; + phys_pages = physmem >> PAGE_SHIFT; + iomem_pages = iomem >> PAGE_SHIFT; - total_pages = phys_pages + iomem_pages + highmem_pages; + total_pages = phys_pages + iomem_pages; max_mapnr = total_pages; } @@ -64,13 +59,12 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, * @reserve_end: end address of the physical kernel memory. * @len: Length of total physical memory that should be mapped/made * available, in bytes. - * @highmem: Number of highmem bytes that should be mapped/made available. * - * Creates an unlinked temporary file of size (len + highmem) and memory maps + * Creates an unlinked temporary file of size (len) and memory maps * it on the last executable image address (uml_reserved). * * The offset is needed as the length of the total physical memory - * (len + highmem) includes the size of the memory used be the executable image, + * (len) includes the size of the memory used be the executable image, * but the mapped-to address is the last address of the executable image * (uml_reserved == end address of executable image). * @@ -78,24 +72,24 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, * of all user space processes/kernel tasks. */ void __init setup_physmem(unsigned long start, unsigned long reserve_end, - unsigned long len, unsigned long long highmem) + unsigned long len) { unsigned long reserve = reserve_end - start; - long map_size = len - reserve; + unsigned long map_size = len - reserve; int err; - if(map_size <= 0) { + if (len <= reserve) { os_warn("Too few physical memory! Needed=%lu, given=%lu\n", reserve, len); exit(1); } - physmem_fd = create_mem_file(len + highmem); + physmem_fd = create_mem_file(len); err = os_map_memory((void *) reserve_end, physmem_fd, reserve, map_size, 1, 1, 1); if (err < 0) { - os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p " + os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p " "failed - errno = %d\n", map_size, (void *) reserve_end, err); exit(1); @@ -107,9 +101,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, */ os_seek_file(physmem_fd, __pa(__syscall_stub_start)); os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE); - os_fsync_file(physmem_fd); - memblock_add(__pa(start), len + highmem); + memblock_add(__pa(start), len); memblock_reserve(__pa(start), reserve); min_low_pfn = PFN_UP(__pa(reserve_end)); @@ -137,10 +130,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) region = region->next; } } - else if (phys < __pa(end_iomem) + highmem) { - fd = physmem_fd; - *offset_out = phys - iomem_size; - } return fd; } @@ -149,6 +138,8 @@ EXPORT_SYMBOL(phys_mapping); static int __init uml_mem_setup(char *line, int *add) { char *retptr; + + *add = 0; physmem_size = memparse(line,&retptr); return 0; } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index be2856af6d4c..30bdc0a87dc8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,7 +43,8 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { NULL } }; +struct task_struct *cpu_tasks[NR_CPUS]; +EXPORT_SYMBOL(cpu_tasks); void free_stack(unsigned long stack, int order) { @@ -64,7 +65,7 @@ unsigned long alloc_stack(int order, int atomic) static inline void set_current(struct task_struct *task) { - cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { task }); + cpu_tasks[task_thread_info(task)->cpu] = task; } struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to) @@ -116,7 +117,7 @@ void new_thread_handler(void) * callback returns only if the kernel thread execs a process */ fn(arg); - userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); + userspace(¤t->thread.regs.regs); } /* Called magically, see new_thread_handler above */ @@ -133,7 +134,7 @@ static void fork_handler(void) current->thread.prev_sched = NULL; - userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); + userspace(¤t->thread.regs.regs); } int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) @@ -187,6 +188,13 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } +int arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src) +{ + memcpy(dst, src, arch_task_struct_size); + return 0; +} + void um_idle_sleep(void) { if (time_travel_mode != TT_MODE_OFF) @@ -287,11 +295,3 @@ unsigned long __get_wchan(struct task_struct *p) return 0; } - -int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu) -{ - int cpu = current_thread_info()->cpu; - - return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); -} - diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore new file mode 100644 index 000000000000..c3409ced0f38 --- /dev/null +++ b/arch/um/kernel/skas/.gitignore @@ -0,0 +1,2 @@ +stub_exe +stub_exe.dbg diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index 6f86d53e3d69..3384be42691f 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,14 +3,48 @@ # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # -obj-y := stub.o mmu.o process.o syscall.o uaccess.o +obj-y := stub.o mmu.o process.o syscall.o uaccess.o \ + stub_exe_embed.o + +# Stub executable + +stub_exe_objs-y := stub_exe.o + +stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F) + +# Object file containing the ELF executable +$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe + +$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE + $(call if_changed,stub_exe) + +$(obj)/stub_exe: OBJCOPYFLAGS := -S +$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE + $(call if_changed,objcopy) + +quiet_cmd_stub_exe = STUB_EXE $@ + cmd_stub_exe = $(CC) -nostdlib -o $@ \ + $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \ + $(filter %.o,$^) + +STUB_EXE_LDFLAGS = -Wl,-n -static + +targets += stub_exe.dbg stub_exe $(stub_exe_objs-y) + +# end # stub.o is in the stub, so it can't be built with profiling # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # disable it CFLAGS_stub.o := $(CFLAGS_NO_HARDENING) -UNPROFILE_OBJS := stub.o +CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING) + +# Clang will call memset() from __builtin_alloca() when stack variable +# initialization is enabled, which is used in stub_exe.c. +CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized) + +UNPROFILE_OBJS := stub.o stub_exe.o KCOV_INSTRUMENT := n include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 886ed5e65674..0eb5a1d3ba70 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -40,35 +40,13 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) goto out_free; } - /* - * Ensure the new MM is clean and nothing unwanted is mapped. - * - * TODO: We should clear the memory up to STUB_START to ensure there is - * nothing mapped there, i.e. we (currently) have: - * - * |- user memory -|- unused -|- stub -|- unused -| - * ^ TASK_SIZE ^ STUB_START - * - * Meaning we have two unused areas where we may still have valid - * mappings from our internal clone(). That isn't really a problem as - * userspace is not going to access them, but it is definitely not - * correct. - * - * However, we are "lucky" and if rseq is configured, then on 32 bit - * it will fall into the first empty range while on 64 bit it is going - * to use an anonymous mapping in the second range. As such, things - * continue to work for now as long as we don't start unmapping these - * areas. - * - * Change this to STUB_START once we have a clean userspace. - */ - unmap(new_id, 0, TASK_SIZE); + /* Ensure the new MM is clean and nothing unwanted is mapped */ + unmap(new_id, 0, STUB_START); return 0; out_free: - if (new_id->stack != 0) - free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); + free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); out: return ret; } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 68657988c8d1..05dcdc057af9 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -22,15 +22,13 @@ static int __init start_kernel_proc(void *unused) { block_signals_trace(); - cpu_tasks[0].task = current; - start_kernel(); return 0; } extern int userspace_pid[]; -extern char cpu0_irqstack[]; +static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); int __init start_uml(void) { diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c index 5d52ffa682dc..796fc266d3bb 100644 --- a/arch/um/kernel/skas/stub.c +++ b/arch/um/kernel/skas/stub.c @@ -35,16 +35,6 @@ static __always_inline int syscall_handler(struct stub_data *d) return -1; } break; - case STUB_SYSCALL_MPROTECT: - res = stub_syscall3(__NR_mprotect, - sc->mem.addr, sc->mem.length, - sc->mem.prot); - if (res) { - d->err = res; - d->syscall_data_len = i; - return -1; - } - break; default: d->err = -95; /* EOPNOTSUPP */ d->syscall_data_len = i; diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c new file mode 100644 index 000000000000..23c99b285e82 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include + +void _start(void); + +noinline static void real_init(void) +{ + struct stub_init_data init_data; + unsigned long res; + struct { + void *ss_sp; + int ss_flags; + size_t ss_size; + } stack = { + .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + }; + struct { + void *sa_handler_; + unsigned long sa_flags; + void *sa_restorer; + unsigned long long sa_mask; + } sa = { + /* Need to set SA_RESTORER (but the handler never returns) */ + .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, + /* no need to mask any signals */ + .sa_mask = 0, + }; + + /* set a nice name */ + stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace"); + + /* Make sure this process dies if the kernel dies */ + stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); + + /* read information from STDIN and close it */ + res = stub_syscall3(__NR_read, 0, + (unsigned long)&init_data, sizeof(init_data)); + if (res != sizeof(init_data)) + stub_syscall1(__NR_exit, 10); + + stub_syscall1(__NR_close, 0); + + /* map stub code + data */ + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED, + init_data.stub_code_fd, init_data.stub_code_offset); + if (res != init_data.stub_start) + stub_syscall1(__NR_exit, 11); + + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start + UM_KERN_PAGE_SIZE, + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + init_data.stub_data_fd, init_data.stub_data_offset); + if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) + stub_syscall1(__NR_exit, 12); + + /* setup signal stack inside stub data */ + stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; + stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); + + /* register SIGSEGV handler */ + sa.sa_handler_ = (void *) init_data.segv_handler; + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, + sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 13); + + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + + stub_syscall1(__NR_exit, 14); + + __builtin_unreachable(); +} + +__attribute__((naked)) void _start(void) +{ + /* + * Since the stack after exec() starts at the top-most address, + * but that's exactly where we also want to map the stub data + * and code, this must: + * - push the stack by 1 code and STUB_DATA_PAGES data pages + * - call real_init() + * This way, real_init() can use the stack normally, while the + * original stack further down (higher address) will become + * inaccessible after the mmap() calls above. + */ + stub_start(real_init); +} diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S new file mode 100644 index 000000000000..6d8914fbe8f1 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe_embed.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + +__INITDATA + +SYM_DATA_START(stub_exe_start) + .incbin "arch/um/kernel/skas/stub_exe" +SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end) + +__FINIT diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 4bb8622dc512..13ee5666668d 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -32,12 +32,6 @@ void show_stack(struct task_struct *task, unsigned long *stack, struct pt_regs *segv_regs = current->thread.segv_regs; int i; - if (!segv_regs && os_is_signal_stack()) { - pr_err("Received SIGSEGV in SIGSEGV handler," - " aborting stack trace!\n"); - return; - } - if (!stack) stack = get_stack_pointer(task, segv_regs); @@ -52,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack, } printk("%sCall Trace:\n", loglvl); - dump_trace(current, &stackops, (void *)loglvl); + dump_trace(task ?: current, &stackops, (void *)loglvl); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 29b27b90581f..1394568c0210 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -25,6 +25,8 @@ #include #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +#include + enum time_travel_mode time_travel_mode; EXPORT_SYMBOL_GPL(time_travel_mode); @@ -47,6 +49,15 @@ static u16 time_travel_shm_id; static struct um_timetravel_schedshm *time_travel_shm; static union um_timetravel_schedshm_client *time_travel_shm_client; +unsigned long tt_extra_sched_jiffies; + +notrace unsigned long long sched_clock(void) +{ + return (unsigned long long)(jiffies - INITIAL_JIFFIES + + tt_extra_sched_jiffies) + * (NSEC_PER_SEC / HZ); +} + static void time_travel_set_time(unsigned long long ns) { if (unlikely(ns < time_travel_time)) @@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e) { time_travel_add_event(&time_travel_timer_event, time_travel_time + time_travel_timer_interval); + + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + deliver_alarm(); } @@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event); static void time_travel_oneshot_timer(struct time_travel_event *e) { + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + deliver_alarm(); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 548af31d4111..cf7e0d4407f2 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -23,9 +23,6 @@ struct vm_ops { int phys_fd, unsigned long long offset); int (*unmap)(struct mm_id *mm_idp, unsigned long virt, unsigned long len); - int (*mprotect)(struct mm_id *mm_idp, - unsigned long virt, unsigned long len, - unsigned int prot); }; static int kern_map(struct mm_id *mm_idp, @@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp, return os_unmap_memory((void *)virt, len); } -static int kern_mprotect(struct mm_id *mm_idp, - unsigned long virt, unsigned long len, - unsigned int prot) -{ - return os_protect_memory((void *)virt, len, - prot & UM_PROT_READ, prot & UM_PROT_WRITE, - 1); -} - void report_enomem(void) { printk(KERN_ERR "UML ran out of memory on the host side! " @@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, struct vm_ops *ops) { pte_t *pte; - int r, w, x, prot, ret = 0; + int ret = 0; pte = pte_offset_kernel(pmd, addr); do { - r = pte_read(*pte); - w = pte_write(*pte); - x = pte_exec(*pte); - if (!pte_young(*pte)) { - r = 0; - w = 0; - } else if (!pte_dirty(*pte)) - w = 0; + if (!pte_needsync(*pte)) + continue; - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | - (x ? UM_PROT_EXEC : 0)); - if (pte_newpage(*pte)) { - if (pte_present(*pte)) { - __u64 offset; - unsigned long phys = pte_val(*pte) & PAGE_MASK; - int fd = phys_mapping(phys, &offset); + if (pte_present(*pte)) { + __u64 offset; + unsigned long phys = pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); + int r, w, x, prot; + + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) + w = 0; + + prot = (r ? UM_PROT_READ : 0) | + (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0); + + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, + prot, fd, offset); + } else + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); - ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, - prot, fd, offset); - } else - ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); - } else if (pte_newprot(*pte)) - ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); *pte = pte_mkuptodate(*pte); } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; @@ -109,7 +101,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) { - if (pmd_newpage(*pmd)) { + if (pmd_needsync(*pmd)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); pmd_mkuptodate(*pmd); @@ -132,7 +124,7 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end); if (!pud_present(*pud)) { - if (pud_newpage(*pud)) { + if (pud_needsync(*pud)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); pud_mkuptodate(*pud); @@ -155,7 +147,7 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end); if (!p4d_present(*p4d)) { - if (p4d_newpage(*p4d)) { + if (p4d_needsync(*p4d)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); p4d_mkuptodate(*p4d); @@ -180,18 +172,16 @@ int um_tlb_sync(struct mm_struct *mm) if (mm == &init_mm) { ops.mmap = kern_map; ops.unmap = kern_unmap; - ops.mprotect = kern_mprotect; } else { ops.mmap = map; ops.unmap = unmap; - ops.mprotect = protect; } pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); if (!pgd_present(*pgd)) { - if (pgd_newpage(*pgd)) { + if (pgd_needsync(*pgd)) { ret = ops.unmap(ops.mm_idp, addr, next - addr); pgd_mkuptodate(*pgd); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 97c8df9c4401..cdaee3e94273 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -201,7 +201,6 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, struct uml_pt_regs *regs) { - jmp_buf *catcher; int si_code; int err; int is_write = FAULT_WRITE(fi); @@ -246,15 +245,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, address = 0; } - catcher = current->thread.fault_catcher; if (!err) goto out; - else if (catcher != NULL) { - current->thread.fault_addr = (void *) address; - UML_LONGJMP(catcher, 1); - } - else if (current->thread.fault_addr != NULL) - panic("fault_addr set but no fault catcher"); else if (!is_user && arch_fixup(ip, regs)) goto out; @@ -310,14 +302,6 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) } } -void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) -{ - if (current->thread.fault_catcher != NULL) - UML_LONGJMP(current->thread.fault_catcher, 1); - else - relay_signal(sig, si, regs); -} - void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { do_IRQ(WINCH_IRQ, regs); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index e8e8b54b3037..8037a967225d 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -65,9 +65,6 @@ struct cpuinfo_um boot_cpu_data = { EXPORT_SYMBOL(boot_cpu_data); -union thread_union cpu0_irqstack - __section(".data..init_irqstack") = - { .thread_info = INIT_THREAD_INFO(init_task) }; /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; @@ -131,7 +128,7 @@ static int have_root __initdata; static int have_console __initdata; /* Set in uml_mem_setup and modified in linux_main */ -long long physmem_size = 64 * 1024 * 1024; +unsigned long long physmem_size = 64 * 1024 * 1024; EXPORT_SYMBOL(physmem_size); static const char *usage_string = @@ -167,19 +164,6 @@ __uml_setup("root=", uml_root_setup, " root=/dev/ubd5\n\n" ); -static int __init no_skas_debug_setup(char *line, int *add) -{ - os_warn("'debug' is not necessary to gdb UML in skas mode - run\n"); - os_warn("'gdb linux'\n"); - - return 0; -} - -__uml_setup("debug", no_skas_debug_setup, -"debug\n" -" this flag is not needed to run gdb on UML in skas mode\n\n" -); - static int __init uml_console_setup(char *line, int *add) { have_console = 1; @@ -257,6 +241,8 @@ static struct notifier_block panic_exit_notifier = { void uml_finishsetup(void) { + cpu_tasks[0] = &init_task; + atomic_notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); @@ -302,7 +288,24 @@ static void parse_cache_line(char *line) } } -int __init linux_main(int argc, char **argv) +static unsigned long get_top_address(char **envp) +{ + unsigned long top_addr = (unsigned long) &top_addr; + int i; + + /* The earliest variable should be after the program name in ELF */ + for (i = 0; envp[i]; i++) { + if ((unsigned long) envp[i] > top_addr) + top_addr = (unsigned long) envp[i]; + } + + top_addr &= ~(UM_KERN_PAGE_SIZE - 1); + top_addr += UM_KERN_PAGE_SIZE; + + return top_addr; +} + +int __init linux_main(int argc, char **argv, char **envp) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; @@ -324,20 +327,23 @@ int __init linux_main(int argc, char **argv) if (have_console == 0) add_arg(DEFAULT_COMMAND_LINE_CONSOLE); - host_task_size = os_get_top_address(); - /* reserve a few pages for the stubs (taking care of data alignment) */ - /* align the data portion */ - BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES)); - stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1); + host_task_size = get_top_address(envp); + /* reserve a few pages for the stubs */ + stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE; /* another page for the code portion */ stub_start -= PAGE_SIZE; host_task_size = stub_start; + /* Limit TASK_SIZE to what is addressable by the page table */ + task_size = host_task_size; + if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE) + task_size = PTRS_PER_PGD * PGDIR_SIZE; + /* * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps * out */ - task_size = host_task_size & PGDIR_MASK; + task_size = task_size & PGDIR_MASK; /* OS sanity checks that need to happen before the kernel runs */ os_early_checks(); @@ -366,18 +372,15 @@ int __init linux_main(int argc, char **argv) setup_machinename(init_utsname()->machine); - highmem = 0; + physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK; iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; - /* - * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary, - * so this makes sure that's true for highmem - */ - max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1); if (physmem_size + iomem_size > max_physmem) { - highmem = physmem_size + iomem_size - max_physmem; - physmem_size -= highmem; + physmem_size = max_physmem - iomem_size; + os_info("Physical memory size shrunk to %llu bytes\n", + physmem_size); } high_physmem = uml_physmem + physmem_size; @@ -398,6 +401,8 @@ int __init linux_main(int argc, char **argv) os_info("Kernel virtual memory size shrunk to %lu bytes\n", virtmem_size); + arch_task_struct_size = sizeof(struct task_struct) + host_fp_size; + os_flush_stdout(); return start_uml(); @@ -412,9 +417,9 @@ void __init setup_arch(char **cmdline_p) { u8 rng_seed[32]; - stack_protections((unsigned long) &init_thread_info); - setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); - mem_total_pages(physmem_size, iomem_size, highmem); + stack_protections((unsigned long) init_task.stack); + setup_physmem(uml_physmem, uml_reserved, physmem_size); + mem_total_pages(physmem_size, iomem_size); uml_dtb_init(); read_initrd(); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 5c92d58a78e8..a409d4b66114 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -77,8 +77,6 @@ SECTIONS .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) - . = ALIGN(KERNEL_STACK_SIZE); - *(.data..init_irqstack) DATA_DATA *(.gnu.linkonce.d*) CONSTRUCTORS diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 544e0b344c75..049dfa5bc9c6 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -12,6 +12,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ CFLAGS_signal.o += -Wframe-larger-than=4096 +CFLAGS_main.o += -Wno-frame-larger-than + obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index f1d03cf3957f..a0d01c68ce3e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -255,12 +255,6 @@ void os_close_file(int fd) { close(fd); } -int os_fsync_file(int fd) -{ - if (fsync(fd) < 0) - return -errno; - return 0; -} int os_seek_file(int fd, unsigned long long offset) { diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index f98ff79cdbf7..0afcdeb8995b 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +109,21 @@ int __init main(int argc, char **argv, char **envp) char **new_argv; int ret, i, err; + /* Disable randomization and re-exec if it was changed successfully */ + ret = personality(PER_LINUX | ADDR_NO_RANDOMIZE); + if (ret >= 0 && (ret & (PER_LINUX | ADDR_NO_RANDOMIZE)) != + (PER_LINUX | ADDR_NO_RANDOMIZE)) { + char buf[4096] = {}; + ssize_t ret; + + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + if (ret < 0 || ret >= sizeof(buf)) { + perror("readlink failure"); + exit(1); + } + execve(buf, argv, envp); + } + set_stklim(); setup_env_path(); @@ -140,7 +156,7 @@ int __init main(int argc, char **argv, char **envp) #endif change_sig(SIGPIPE, 0); - ret = linux_main(argc, argv); + ret = linux_main(argc, argv, envp); /* * Disable SIGPROF - I have no idea why libc doesn't do this or turn @@ -182,6 +198,7 @@ int __init main(int argc, char **argv, char **envp) } extern void *__real_malloc(int); +extern void __real_free(void *); /* workaround for -Wmissing-prototypes warnings */ void *__wrap_malloc(int size); @@ -219,10 +236,6 @@ void *__wrap_calloc(int n, int size) return ptr; } -extern void __real_free(void *); - -extern unsigned long high_physmem; - void __wrap_free(void *ptr) { unsigned long addr = (unsigned long) ptr; diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index cf44d386f23c..72f302f4d197 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -39,10 +39,22 @@ void kasan_map_memory(void *start, size_t len) strerror(errno)); exit(1); } + + if (madvise(start, len, MADV_DONTDUMP)) { + os_info("Couldn't set MAD_DONTDUMP on shadow memory: %s\n.", + strerror(errno)); + exit(1); + } + + if (madvise(start, len, MADV_DONTFORK)) { + os_info("Couldn't set MADV_DONTFORK on shadow memory: %s\n.", + strerror(errno)); + exit(1); + } } /* Set by make_tempfile() during early boot. */ -static char *tempdir = NULL; +char *tempdir = NULL; /* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */ static int __init check_tmpfs(const char *dir) diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index e52dd37ddadc..9f086f939420 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -12,94 +12,18 @@ #include #include #include +#include #include #include #include #include #include -#define ARBITRARY_ADDR -1 -#define FAILURE_PID -1 - -#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") -#define COMM_SCANF "%*[^)])" - -unsigned long os_process_pc(int pid) -{ - char proc_stat[STAT_PATH_LEN], buf[256]; - unsigned long pc = ARBITRARY_ADDR; - int fd, err; - - sprintf(proc_stat, "/proc/%d/stat", pid); - fd = open(proc_stat, O_RDONLY, 0); - if (fd < 0) { - printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " - "errno = %d\n", proc_stat, errno); - goto out; - } - CATCH_EINTR(err = read(fd, buf, sizeof(buf))); - if (err < 0) { - printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " - "err = %d\n", proc_stat, errno); - goto out_close; - } - os_close_file(fd); - pc = ARBITRARY_ADDR; - if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %lu", &pc) != 1) - printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", - buf); - out_close: - close(fd); - out: - return pc; -} - -int os_process_parent(int pid) -{ - char stat[STAT_PATH_LEN]; - char data[256]; - int parent = FAILURE_PID, n, fd; - - if (pid == -1) - return parent; - - snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); - fd = open(stat, O_RDONLY, 0); - if (fd < 0) { - printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, - errno); - return parent; - } - - CATCH_EINTR(n = read(fd, data, sizeof(data))); - close(fd); - - if (n < 0) { - printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, - errno); - return parent; - } - - parent = FAILURE_PID; - n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); - if (n != 1) - printk(UM_KERN_ERR "Failed to scan '%s'\n", data); - - return parent; -} - void os_alarm_process(int pid) { kill(pid, SIGALRM); } -void os_stop_process(int pid) -{ - kill(pid, SIGSTOP); -} - void os_kill_process(int pid, int reap_child) { kill(pid, SIGKILL); @@ -130,11 +54,6 @@ int os_getpid(void) return syscall(__NR_getpid); } -int os_getpgrp(void) -{ - return getpgrp(); -} - int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { @@ -285,3 +204,8 @@ void init_new_thread_signals(void) set_handler(SIGIO); signal(SIGWINCH, SIG_IGN); } + +void os_set_pdeathsig(void) +{ + prctl(PR_SET_PDEATHSIG, SIGKILL); +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index bd80b921add0..d7ca148807b2 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -10,11 +10,12 @@ #include #include #include +#include /* This is set once at boot time and not changed thereafter */ static unsigned long exec_regs[MAX_REG_NR]; -static unsigned long exec_fp_regs[FP_SIZE]; +static unsigned long *exec_fp_regs; int init_pid_registers(int pid) { @@ -24,7 +25,11 @@ int init_pid_registers(int pid) if (err < 0) return -errno; - arch_init_registers(pid); + err = arch_init_registers(pid); + if (err < 0) + return err; + + exec_fp_regs = malloc(host_fp_size); get_fp_registers(pid, exec_fp_regs); return 0; } @@ -34,5 +39,5 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) memcpy(regs, exec_regs, sizeof(exec_regs)); if (fp_regs) - memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs)); + memcpy(fp_regs, exec_fp_regs, host_fp_size); } diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 9e71794839e8..9aac8def4d63 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -55,6 +55,7 @@ static int write_sigio_thread(void *unused) int i, n, respond_fd; char c; + os_set_pdeathsig(); os_fix_helper_signals(); fds = ¤t_poll; while (1) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index b11ed66c8bb0..9ea7269ffb77 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -26,7 +26,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGFPE] = relay_signal, [SIGILL] = relay_signal, [SIGWINCH] = winch, - [SIGBUS] = bus_handler, + [SIGBUS] = relay_signal, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, }; @@ -65,7 +65,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_MASK (1 << SIGALRM_BIT) int signals_enabled; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif static unsigned int signals_pending; @@ -75,7 +75,7 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled = signals_enabled; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) if ((signals_blocked || __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) && (sig == SIGIO)) { @@ -190,43 +190,8 @@ static void hard_handler(int sig, siginfo_t *si, void *p) { ucontext_t *uc = p; mcontext_t *mc = &uc->uc_mcontext; - unsigned long pending = 1UL << sig; - do { - int nested, bail; - - /* - * pending comes back with one bit set for each - * interrupt that arrived while setting up the stack, - * plus a bit for this interrupt, plus the zero bit is - * set if this is a nested interrupt. - * If bail is true, then we interrupted another - * handler setting up the stack. In this case, we - * have to return, and the upper handler will deal - * with this interrupt. - */ - bail = to_irq_stack(&pending); - if (bail) - return; - - nested = pending & 1; - pending &= ~1; - - while ((sig = ffs(pending)) != 0){ - sig--; - pending &= ~(1 << sig); - (*handlers[sig])(sig, (struct siginfo *)si, mc); - } - - /* - * Again, pending comes back with a mask of signals - * that arrived while tearing down the stack. If this - * is non-zero, we just go back, set up the stack - * again, and handle the new interrupts. - */ - if (!nested) - pending = from_irq_stack(nested); - } while (pending); + (*handlers[sig])(sig, (struct siginfo *)si, mc); } void set_handler(int sig) @@ -297,7 +262,7 @@ void unblock_signals(void) return; signals_enabled = 1; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -389,7 +354,7 @@ int um_set_signals_trace(int enable) return ret; } -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) void mark_sigio_pending(void) { /* @@ -487,11 +452,3 @@ void unblock_signals_hard(void) unblocking = false; } #endif - -int os_is_signal_stack(void) -{ - stack_t ss; - sigaltstack(NULL, &ss); - - return ss.ss_flags & SS_ONSTACK; -} diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 9a13ac23c606..d7f1814b0e5a 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -217,24 +217,3 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len) return 0; } - -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, - unsigned int prot) -{ - struct stub_syscall *sc; - - /* Compress with previous syscall if that is possible */ - sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr); - if (sc && sc->mem.prot == prot) { - sc->mem.length += len; - return 0; - } - - sc = syscall_stub_alloc(mm_idp); - sc->syscall = STUB_SYSCALL_MPROTECT; - sc->mem.addr = addr; - sc->mem.length = len; - sc->mem.prot = prot; - - return 0; -} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index b6f656bcffb1..f683cfc9e51a 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -10,8 +10,11 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -141,16 +144,10 @@ void wait_stub_done(int pid) extern unsigned long current_stub_stack(void); -static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs) +static void get_skas_faultinfo(int pid, struct faultinfo *fi) { int err; - err = get_fp_registers(pid, aux_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "save_fp_registers returned %d\n", - err); - fatal_sigsegv(); - } err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); if (err) { printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " @@ -164,18 +161,11 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux * the stub stack page. We just have to copy it. */ memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); - - err = put_fp_registers(pid, aux_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "put_fp_registers returned %d\n", - err); - fatal_sigsegv(); - } } -static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +static void handle_segv(int pid, struct uml_pt_regs *regs) { - get_skas_faultinfo(pid, ®s->faultinfo, aux_fp_regs); + get_skas_faultinfo(pid, ®s->faultinfo); segv(regs->faultinfo, 0, 1, NULL); } @@ -189,69 +179,131 @@ static void handle_trap(int pid, struct uml_pt_regs *regs) extern char __syscall_stub_start[]; -/** - * userspace_tramp() - userspace trampoline - * @stack: pointer to the new userspace stack page - * - * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed. - * This function will run on a temporary stack page. - * It ptrace()'es itself, then - * Two pages are mapped into the userspace address space: - * - STUB_CODE (with EXEC), which contains the skas stub code - * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel. - * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process. - * And last the process stops itself to give control to the UML kernel for this userspace process. - * - * Return: Always zero, otherwise the current userspace process is ended with non null exit() call - */ +static int stub_exe_fd; + static int userspace_tramp(void *stack) { - struct sigaction sa; - void *addr; - int fd; + char *const argv[] = { "uml-userspace", NULL }; + int pipe_fds[2]; unsigned long long offset; - unsigned long segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start; + struct stub_init_data init_data = { + .stub_start = STUB_START, + .segv_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start, + }; + struct iomem_region *iomem; + int ret; - ptrace(PTRACE_TRACEME, 0, 0, 0); + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), + &offset); + init_data.stub_code_offset = MMAP_OFFSET(offset); - signal(SIGTERM, SIG_DFL); - signal(SIGWINCH, SIG_IGN); + init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_offset = MMAP_OFFSET(offset); - fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); - addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, - PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping mmap stub at 0x%lx failed, errno = %d\n", - STUB_CODE, errno); - exit(1); + /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ + close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + + fcntl(init_data.stub_data_fd, F_SETFD, 0); + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + + /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ + if (pipe(pipe_fds)) + exit(2); + + if (dup2(pipe_fds[0], 0) < 0) + exit(3); + close(pipe_fds[0]); + + /* Write init_data and close write side */ + ret = write(pipe_fds[1], &init_data, sizeof(init_data)); + close(pipe_fds[1]); + + if (ret != sizeof(init_data)) + exit(4); + + execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + + exit(5); +} + +extern char stub_exe_start[]; +extern char stub_exe_end[]; + +extern char *tempdir; + +#define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX" + +#ifndef MFD_EXEC +#define MFD_EXEC 0x0010U +#endif + +static int __init init_stub_exe_fd(void) +{ + size_t written = 0; + char *tmpfile = NULL; + + stub_exe_fd = memfd_create("uml-userspace", + MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING); + + if (stub_exe_fd < 0) { + printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!"); + + tmpfile = malloc(strlen(tempdir) + + strlen(STUB_EXE_NAME_TEMPLATE) + 1); + if (tmpfile == NULL) + panic("Failed to allocate memory for stub binary name"); + + strcpy(tmpfile, tempdir); + strcat(tmpfile, STUB_EXE_NAME_TEMPLATE); + + stub_exe_fd = mkstemp(tmpfile); + if (stub_exe_fd < 0) + panic("Could not create temporary file for stub binary: %d", + -errno); } - fd = phys_mapping(uml_to_phys(stack), &offset); - addr = mmap((void *) STUB_DATA, - STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_SHARED, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping segfault stack at 0x%lx failed, errno = %d\n", - STUB_DATA, errno); - exit(1); + while (written < stub_exe_end - stub_exe_start) { + ssize_t res = write(stub_exe_fd, stub_exe_start + written, + stub_exe_end - stub_exe_start - written); + if (res < 0) { + if (errno == EINTR) + continue; + + if (tmpfile) + unlink(tmpfile); + panic("Failed write stub binary: %d", -errno); + } + + written += res; } - set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; - sa.sa_sigaction = (void *) segv_handler; - sa.sa_restorer = NULL; - if (sigaction(SIGSEGV, &sa, NULL) < 0) { - os_info("%s - setting SIGSEGV handler failed - errno = %d\n", - __func__, errno); - exit(1); + if (!tmpfile) { + fcntl(stub_exe_fd, F_ADD_SEALS, + F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL); + } else { + if (fchmod(stub_exe_fd, 00500) < 0) { + unlink(tmpfile); + panic("Could not make stub binary executable: %d", + -errno); + } + + close(stub_exe_fd); + stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); + if (stub_exe_fd < 0) { + unlink(tmpfile); + panic("Could not reopen stub binary: %d", -errno); + } + + unlink(tmpfile); + free(tmpfile); } - kill(os_getpid(), SIGSTOP); return 0; } +__initcall(init_stub_exe_fd); int userspace_pid[NR_CPUS]; @@ -270,7 +322,7 @@ int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n, flags, err; + int pid, status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -286,10 +338,10 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - flags = CLONE_FILES | SIGCHLD; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + pid = clone(userspace_tramp, (void *) sp, + CLONE_VFORK | CLONE_VM | SIGCHLD, + (void *)stub_stack); if (pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", @@ -336,7 +388,10 @@ int start_userspace(unsigned long stub_stack) return err; } -void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +int unscheduled_userspace_iterations; +extern unsigned long tt_extra_sched_jiffies; + +void userspace(struct uml_pt_regs *regs) { int err, status, op, pid = userspace_pid[0]; siginfo_t si; @@ -345,6 +400,29 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { + /* + * When we are in time-travel mode, userspace can theoretically + * do a *lot* of work without being scheduled. The problem with + * this is that it will prevent kernel bookkeeping (primarily + * the RCU) from running and this can for example cause OOM + * situations. + * + * This code accounts a jiffie against the scheduling clock + * after the defined userspace iterations in the same thread. + * By doing so the situation is effectively prevented. + */ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { +#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS + if (CONFIG_UML_MAX_USERSPACE_ITERATIONS && + unscheduled_userspace_iterations++ > + CONFIG_UML_MAX_USERSPACE_ITERATIONS) { + tt_extra_sched_jiffies += 1; + unscheduled_userspace_iterations = 0; + } +#endif + } + time_travel_print_bc_msg(); current_mm_sync(); @@ -435,11 +513,11 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) case SIGSEGV: if (PTRACE_FULL_FAULTINFO) { get_skas_faultinfo(pid, - ®s->faultinfo, aux_fp_regs); + ®s->faultinfo); (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, regs); } - else handle_segv(pid, regs, aux_fp_regs); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs); @@ -487,6 +565,8 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) void switch_threads(jmp_buf *me, jmp_buf *you) { + unscheduled_userspace_iterations = 0; + if (UML_SETJMP(me) == 0) UML_LONGJMP(you, 1); } @@ -570,6 +650,7 @@ static bool noreboot; static int __init noreboot_cmd_param(char *str, int *add) { + *add = 0; noreboot = true; return 0; } diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index e09d65b05d1c..eb523ab1e218 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -358,6 +358,8 @@ char *get_umid(void) static int __init set_uml_dir(char *name, int *add) { + *add = 0; + if (*name == '\0') { os_warn("uml_dir can't be an empty string\n"); return 0; diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 1dca4ffbd572..4193e04d7e4a 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -52,8 +52,8 @@ void setup_machinename(char *machine_out) struct utsname host; uname(&host); -#ifdef UML_CONFIG_UML_X86 -# ifndef UML_CONFIG_64BIT +#if IS_ENABLED(CONFIG_UML_X86) +# if !IS_ENABLED(CONFIG_64BIT) if (!strcmp(host.machine, "x86_64")) { strcpy(machine_out, "i686"); return; diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 186f13268401..986045d5e638 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -10,6 +10,7 @@ config UML_X86 def_bool y select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 select DCACHE_WORD_ACCESS + select HAVE_EFFICIENT_UNALIGNED_ACCESS config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" @@ -28,17 +29,6 @@ config X86_64 def_bool 64BIT select MODULES_USE_ELF_RELA -config 3_LEVEL_PGTABLES - bool "Three-level pagetables" if !64BIT - default 64BIT - help - Three-level pagetables will let UML have more than 4G of physical - memory. All the memory that can't be mapped directly will be treated - as high memory. - - However, this it experimental on 32-bit architectures, so if unsure say - N (on x86-64 it's automatically enabled, instead, as it's safe there). - config ARCH_HAS_SC_SIGNALS def_bool !64BIT diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 36e67fc97c22..b42c31cd2390 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -10,7 +10,7 @@ else endif obj-y = bugs_$(BITS).o delay.o fault.o \ - ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ + ptrace.o ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ mem_$(BITS).o subarch.o os-Linux/ diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 6052200fe925..62ed5d68a978 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -8,6 +8,8 @@ #include #include +#define CORE_DUMP_USE_REGSET + #ifdef CONFIG_X86_32 #define R_386_NONE 0 diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index 2fef3da55533..2641d28d115c 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -2,6 +2,16 @@ #ifndef __UM_X86_PTRACE_H #define __UM_X86_PTRACE_H +/* This is here because signal.c needs the REGSET_FP_LEGACY definition */ +enum { + REGSET_GENERAL, +#ifdef CONFIG_X86_32 + REGSET_FP_LEGACY, +#endif + REGSET_FP, + REGSET_XSTATE, +}; + #include #ifndef CONFIG_X86_32 #define __FRAME_OFFSETS /* Needed to get the R* macros */ diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile index 5249bbc30dcd..77a308aaa5ec 100644 --- a/arch/x86/um/os-Linux/Makefile +++ b/arch/x86/um/os-Linux/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-y = registers.o task_size.o mcontext.o +obj-y = registers.o mcontext.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index f3638dd09cec..76eaeb93928c 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -16,132 +16,57 @@ #include #include #include +#include -static int have_xstate_support; - -int save_i387_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} - -int save_fp_registers(int pid, unsigned long *fp_regs) -{ -#ifdef PTRACE_GETREGSET - struct iovec iov; - - if (have_xstate_support) { - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) - return -errno; - return 0; - } else -#endif - return save_i387_registers(pid, fp_regs); -} - -int restore_i387_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} - -int restore_fp_registers(int pid, unsigned long *fp_regs) -{ -#ifdef PTRACE_SETREGSET - struct iovec iov; - if (have_xstate_support) { - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) - return -errno; - return 0; - } else -#endif - return restore_i387_registers(pid, fp_regs); -} - -#ifdef __i386__ -int have_fpx_regs = 1; -int save_fpx_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} - -int restore_fpx_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} +unsigned long host_fp_size; int get_fp_registers(int pid, unsigned long *regs) { - if (have_fpx_regs) - return save_fpx_registers(pid, regs); - else - return save_fp_registers(pid, regs); + struct iovec iov = { + .iov_base = regs, + .iov_len = host_fp_size, + }; + + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + return -errno; + return 0; } int put_fp_registers(int pid, unsigned long *regs) { - if (have_fpx_regs) - return restore_fpx_registers(pid, regs); - else - return restore_fp_registers(pid, regs); + struct iovec iov = { + .iov_base = regs, + .iov_len = host_fp_size, + }; + + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + return -errno; + return 0; } -void arch_init_registers(int pid) +int arch_init_registers(int pid) { - struct user_fpxregs_struct fpx_regs; - int err; + struct iovec iov = { + /* Just use plenty of space, it does not cost us anything */ + .iov_len = 2 * 1024 * 1024, + }; + int ret; - err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs); - if (!err) - return; + iov.iov_base = mmap(NULL, iov.iov_len, PROT_WRITE | PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (iov.iov_base == MAP_FAILED) + return -ENOMEM; - if (errno != EIO) - panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", - errno); + /* GDB has x86_xsave_length, which uses x86_cpuid_count */ + ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov); + if (ret) + ret = -errno; + munmap(iov.iov_base, 2 * 1024 * 1024); - have_fpx_regs = 0; + host_fp_size = iov.iov_len; + + return ret; } -#else - -int get_fp_registers(int pid, unsigned long *regs) -{ - return save_fp_registers(pid, regs); -} - -int put_fp_registers(int pid, unsigned long *regs) -{ - return restore_fp_registers(pid, regs); -} - -void arch_init_registers(int pid) -{ -#ifdef PTRACE_GETREGSET - void * fp_regs; - struct iovec iov; - - fp_regs = malloc(FP_SIZE * sizeof(unsigned long)); - if(fp_regs == NULL) - return; - - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0) - have_xstate_support = 1; - - free(fp_regs); -#endif -} -#endif unsigned long get_thread_reg(int reg, jmp_buf *buf) { diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c deleted file mode 100644 index 1dc9adc20b1c..000000000000 --- a/arch/x86/um/os-Linux/task_size.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include - -#ifdef __i386__ - -static jmp_buf buf; - -static void segfault(int sig) -{ - longjmp(buf, 1); -} - -static int page_ok(unsigned long page) -{ - unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); - unsigned long n = ~0UL; - void *mapped = NULL; - int ok = 0; - - /* - * First see if the page is readable. If it is, it may still - * be a VDSO, so we go on to see if it's writable. If not - * then try mapping memory there. If that fails, then we're - * still in the kernel area. As a sanity check, we'll fail if - * the mmap succeeds, but gives us an address different from - * what we wanted. - */ - if (setjmp(buf) == 0) - n = *address; - else { - mapped = mmap(address, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mapped == MAP_FAILED) - return 0; - if (mapped != address) - goto out; - } - - /* - * Now, is it writeable? If so, then we're in user address - * space. If not, then try mprotecting it and try the write - * again. - */ - if (setjmp(buf) == 0) { - *address = n; - ok = 1; - goto out; - } else if (mprotect(address, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE) != 0) - goto out; - - if (setjmp(buf) == 0) { - *address = n; - ok = 1; - } - - out: - if (mapped != NULL) - munmap(mapped, UM_KERN_PAGE_SIZE); - return ok; -} - -unsigned long os_get_top_address(void) -{ - struct sigaction sa, old; - unsigned long bottom = 0; - /* - * A 32-bit UML on a 64-bit host gets confused about the VDSO at - * 0xffffe000. It is mapped, is readable, can be reprotected writeable - * and written. However, exec discovers later that it can't be - * unmapped. So, just set the highest address to be checked to just - * below it. This might waste some address space on 4G/4G 32-bit - * hosts, but shouldn't hurt otherwise. - */ - unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; - unsigned long test, original; - - printf("Locating the bottom of the address space ... "); - fflush(stdout); - - /* - * We're going to be longjmping out of the signal handler, so - * SA_DEFER needs to be set. - */ - sa.sa_handler = segfault; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_NODEFER; - if (sigaction(SIGSEGV, &sa, &old)) { - perror("os_get_top_address"); - exit(1); - } - - /* Manually scan the address space, bottom-up, until we find - * the first valid page (or run out of them). - */ - for (bottom = 0; bottom < top; bottom++) { - if (page_ok(bottom)) - break; - } - - /* If we've got this far, we ran out of pages. */ - if (bottom == top) { - fprintf(stderr, "Unable to determine bottom of address " - "space.\n"); - exit(1); - } - - printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT); - printf("Locating the top of the address space ... "); - fflush(stdout); - - original = bottom; - - /* This could happen with a 4G/4G split */ - if (page_ok(top)) - goto out; - - do { - test = bottom + (top - bottom) / 2; - if (page_ok(test)) - bottom = test; - else - top = test; - } while (top - bottom > 1); - -out: - /* Restore the old SIGSEGV handling */ - if (sigaction(SIGSEGV, &old, NULL)) { - perror("os_get_top_address"); - exit(1); - } - top <<= UM_KERN_PAGE_SHIFT; - printf("0x%lx\n", top); - - return top; -} - -#else - -unsigned long os_get_top_address(void) -{ - /* The old value of CONFIG_TOP_ADDR */ - return 0x7fc0002000; -} - -#endif diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c new file mode 100644 index 000000000000..57c504fd5626 --- /dev/null +++ b/arch/x86/um/ptrace.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_32 +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) + + for (i = 0; i < 8; i++) { + if (twd & 0x1) { + st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if (st->significand[3] & 0x8000) + tag = 0; /* Valid */ + else + tag = 2; /* Special */ + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +/* Get/set the old 32bit i387 registers (pre-FPX) */ +static int fpregs_legacy_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + int i; + + membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul); + membuf_store(&to, (unsigned long)fxsave->swd | 0xffff0000ul); + membuf_store(&to, twd_fxsr_to_i387(fxsave)); + membuf_store(&to, fxsave->fip); + membuf_store(&to, fxsave->fcs | ((unsigned long)fxsave->fop << 16)); + membuf_store(&to, fxsave->foo); + membuf_store(&to, fxsave->fos); + + for (i = 0; i < 8; i++) + membuf_write(&to, (void *)fxsave->st_space + i * 16, 10); + + return 0; +} + +static int fpregs_legacy_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + const struct user_i387_struct *from; + struct user_i387_struct buf; + int i; + + if (ubuf) { + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + from = &buf; + } else { + from = kbuf; + } + + fxsave->cwd = (unsigned short)(from->cwd & 0xffff); + fxsave->swd = (unsigned short)(from->swd & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(from->twd & 0xffff)); + fxsave->fip = from->fip; + fxsave->fop = (unsigned short)((from->fcs & 0xffff0000ul) >> 16); + fxsave->fcs = (from->fcs & 0xffff); + fxsave->foo = from->foo; + fxsave->fos = from->fos; + + for (i = 0; i < 8; i++) { + memcpy((void *)fxsave->st_space + i * 16, + (void *)from->st_space + i * 10, 10); + } + + return 0; +} +#endif + +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int reg; + + for (reg = 0; to.left; reg++) + membuf_store(&to, getreg(target, reg * sizeof(unsigned long))); + return 0; +} + +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + if (kbuf) { + const unsigned long *k = kbuf; + + while (count >= sizeof(*k) && !ret) { + ret = putreg(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + + while (count >= sizeof(*u) && !ret) { + unsigned long word; + + ret = __get_user(word, u++); + if (ret) + break; + ret = putreg(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return ret; +} + +static int generic_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return regset->n; +} + +static int generic_fpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + void *fpregs = task_pt_regs(target)->regs.fp; + + membuf_write(&to, fpregs, regset->size * regset->n); + return 0; +} + +static int generic_fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + void *fpregs = task_pt_regs(target)->regs.fp; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, 0, regset->size * regset->n); +} + +static struct user_regset uml_regsets[] __ro_after_init = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_regs_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .regset_get = genregs_get, + .set = genregs_set + }, +#ifdef CONFIG_X86_32 + /* Old FP registers, they are needed in signal frames */ + [REGSET_FP_LEGACY] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_ia32_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = fpregs_legacy_get, + .set = fpregs_legacy_set, + }, +#endif + [REGSET_FP] = { +#ifdef CONFIG_X86_32 + .core_note_type = NT_PRXFPREG, + .n = sizeof(struct user32_fxsr_struct) / sizeof(long), +#else + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_struct) / sizeof(long), +#endif + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = generic_fpregs_get, + .set = generic_fpregs_set, + }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = generic_fpregs_get, + .set = generic_fpregs_set, + }, + /* TODO: Add TLS regset for 32bit */ +}; + +static const struct user_regset_view user_uml_view = { +#ifdef CONFIG_X86_32 + .name = "i386", .e_machine = EM_386, +#else + .name = "x86_64", .e_machine = EM_X86_64, +#endif + .regsets = uml_regsets, .n = ARRAY_SIZE(uml_regsets) +}; + +const struct user_regset_view * +task_user_regset_view(struct task_struct *tsk) +{ + return &user_uml_view; +} + +static int __init init_regset_xstate_info(void) +{ + uml_regsets[REGSET_XSTATE].n = + host_fp_size / uml_regsets[REGSET_XSTATE].size; + + return 0; +} +arch_initcall(init_regset_xstate_info); diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index b0a71c6cdc6e..3af3cb821524 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -168,65 +169,6 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long __user *) data); } -static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = task_cpu(child); - struct user_i387_struct fpregs; - - err = save_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if(n > 0) - return -EFAULT; - - return n; -} - -static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int n, cpu = task_cpu(child); - struct user_i387_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - -static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = task_cpu(child); - struct user_fxsr_struct fpregs; - - err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if(n > 0) - return -EFAULT; - - return n; -} - -static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) -{ - int n, cpu = task_cpu(child); - struct user_fxsr_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_fpx_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -234,17 +176,25 @@ long subarch_ptrace(struct task_struct *child, long request, void __user *datap = (void __user *) data; switch (request) { case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP_LEGACY, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP_LEGACY, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_GETFPXREGS: /* Get the child FPU state. */ - ret = get_fpxregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_fxsr_struct), + datap); case PTRACE_SETFPXREGS: /* Set the child FPU state. */ - ret = set_fpxregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_fxsr_struct), + datap); default: ret = -EIO; } diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index aa68d83d3f44..e0d4120a45c8 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -8,6 +8,7 @@ #include #include #include +#include #define __FRAME_OFFSETS #include #include @@ -188,36 +189,6 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long *) data); } -static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; - struct user_i387_struct fpregs; - - err = save_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return n; -} - -static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int n, cpu = ((struct thread_info *) child->stack)->cpu; - struct user_i387_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -226,11 +197,15 @@ long subarch_ptrace(struct task_struct *child, long request, switch (request) { case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_ARCH_PRCTL: /* XXX Calls ptrace on the host - needs some SMP thinking */ ret = arch_prctl(child, data, (void __user *) addr); diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 6ca4ecabc55b..2dd4ca6713f8 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -56,12 +56,16 @@ struct syscall_args { UPT_SYSCALL_ARG5(r), \ UPT_SYSCALL_ARG6(r) } } ) +extern unsigned long host_fp_size; + struct uml_pt_regs { unsigned long gp[MAX_REG_NR]; - unsigned long fp[MAX_FP_NR]; struct faultinfo faultinfo; long syscall; int is_user; + + /* Dynamically sized FP registers (holds an XSTATE) */ + unsigned long fp[]; }; #define EMPTY_UML_PT_REGS { } @@ -72,4 +76,6 @@ struct uml_pt_regs { extern int user_context(unsigned long sp); +extern int arch_init_registers(int pid); + #endif /* __SYSDEP_X86_PTRACE_H */ diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h index 0c4989842fbe..2392470cac4d 100644 --- a/arch/x86/um/shared/sysdep/ptrace_32.h +++ b/arch/x86/um/shared/sysdep/ptrace_32.h @@ -6,8 +6,6 @@ #ifndef __SYSDEP_I386_PTRACE_H #define __SYSDEP_I386_PTRACE_H -#define MAX_FP_NR HOST_FPX_SIZE - #define UPT_SYSCALL_ARG1(r) UPT_BX(r) #define UPT_SYSCALL_ARG2(r) UPT_CX(r) #define UPT_SYSCALL_ARG3(r) UPT_DX(r) @@ -15,6 +13,4 @@ #define UPT_SYSCALL_ARG5(r) UPT_DI(r) #define UPT_SYSCALL_ARG6(r) UPT_BP(r) -extern void arch_init_registers(int pid); - #endif diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h index 0dc223aa1c2d..e73573ac871f 100644 --- a/arch/x86/um/shared/sysdep/ptrace_64.h +++ b/arch/x86/um/shared/sysdep/ptrace_64.h @@ -8,8 +8,6 @@ #ifndef __SYSDEP_X86_64_PTRACE_H #define __SYSDEP_X86_64_PTRACE_H -#define MAX_FP_NR HOST_FP_SIZE - #define REGS_R8(r) ((r)[HOST_R8]) #define REGS_R9(r) ((r)[HOST_R9]) #define REGS_R10(r) ((r)[HOST_R10]) @@ -57,6 +55,4 @@ #define UPT_SYSCALL_ARG5(r) UPT_R8(r) #define UPT_SYSCALL_ARG6(r) UPT_R9(r) -extern void arch_init_registers(int pid); - #endif diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h index 1d1a824fa652..98da23120538 100644 --- a/arch/x86/um/shared/sysdep/ptrace_user.h +++ b/arch/x86/um/shared/sysdep/ptrace_user.h @@ -11,12 +11,6 @@ #define REGS_IP_INDEX HOST_IP #define REGS_SP_INDEX HOST_SP -#ifdef __i386__ -#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE) -#else -#define FP_SIZE HOST_FP_SIZE -#endif - /* * glibc before 2.27 does not include PTRACE_SYSEMU_SINGLESTEP in its enum, * ensure we have a definition by (re-)defining it here. diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index 0b44a86dd346..390988132c0a 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -112,11 +112,23 @@ static __always_inline void *get_stub_data(void) unsigned long ret; asm volatile ( - "movl %%esp,%0 ;" - "andl %1,%0" + "call _here_%=;" + "_here_%=:" + "popl %0;" + "andl %1, %0 ;" + "addl %2, %0 ;" : "=a" (ret) - : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1))); + : "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (UM_KERN_PAGE_SIZE)); return (void *)ret; } + +#define stub_start(fn) \ + asm volatile ( \ + "subl %0,%%esp ;" \ + "movl %1, %%eax ; " \ + "call *%%eax ;" \ + :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + "i" (&fn)) #endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 67f44284f1aa..294affbec742 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -28,6 +28,17 @@ static __always_inline long stub_syscall0(long syscall) return ret; } +static __always_inline long stub_syscall1(long syscall, long arg1) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1) : __syscall_clobber ); + + return ret; +} + static __always_inline long stub_syscall2(long syscall, long arg1, long arg2) { long ret; @@ -106,11 +117,21 @@ static __always_inline void *get_stub_data(void) unsigned long ret; asm volatile ( - "movq %%rsp,%0 ;" - "andq %1,%0" + "lea 0(%%rip), %0;" + "andq %1, %0 ;" + "addq %2, %0 ;" : "=a" (ret) - : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1))); + : "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (UM_KERN_PAGE_SIZE)); return (void *)ret; } + +#define stub_start(fn) \ + asm volatile ( \ + "subq %0,%%rsp ;" \ + "movq %1,%%rax ;" \ + "call *%%rax ;" \ + :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + "i" (&fn)) #endif diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 2cc8c2309022..75087e85b6fd 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -16,145 +16,24 @@ #include #include +#include +#include + #ifdef CONFIG_X86_32 - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; -} - -static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) -{ - struct _fpxreg *st = NULL; - unsigned long twd = (unsigned long) fxsave->twd; - unsigned long tag; - unsigned long ret = 0xffff0000; - int i; - -#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) - - for (i = 0; i < 8; i++) { - if (twd & 0x1) { - st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = 2; /* Special */ - break; - case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } - break; - default: - if (st->significand[3] & 0x8000) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } - break; - } - } else { - tag = 3; /* Empty */ - } - ret |= (tag << (2 * i)); - twd = twd >> 1; - } - return ret; -} - -static int convert_fxsr_to_user(struct _fpstate __user *buf, - struct user_fxsr_struct *fxsave) -{ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; - int i; - - env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; - env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; - env[2] = twd_fxsr_to_i387(fxsave); - env[3] = fxsave->fip; - env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); - env[5] = fxsave->foo; - env[6] = fxsave->fos; - - if (__copy_to_user(buf, env, 7 * sizeof(unsigned long))) - return 1; - - to = &buf->_st[0]; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for (i = 0; i < 8; i++, to++, from++) { - unsigned long __user *t = (unsigned long __user *)to; - unsigned long *f = (unsigned long *)from; - - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; - } - return 0; -} - -static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave, - struct _fpstate __user *buf) -{ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; - int i; - - if (copy_from_user( env, buf, 7 * sizeof(long))) - return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); - fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); - fxsave->fip = env[3]; - fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); - fxsave->fcs = (env[4] & 0xffff); - fxsave->foo = env[5]; - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; - for (i = 0; i < 8; i++, to++, from++) { - unsigned long *t = (unsigned long *)to; - unsigned long __user *f = (unsigned long __user *)from; - - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; - } - return 0; -} - -extern int have_fpx_regs; - +struct _xstate_64 { + struct _fpstate_64 fpstate; + struct _header xstate_hdr; + struct _ymmh_state ymmh; + /* New processor state extensions go here: */ +}; +#else +#define _xstate_64 _xstate #endif static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from) { + struct _xstate_64 __user *from_fp64; struct sigcontext sc; int err; @@ -203,35 +82,27 @@ static int copy_sc_from_user(struct pt_regs *regs, #undef GETREG #ifdef CONFIG_X86_32 - if (have_fpx_regs) { - struct user_fxsr_struct fpx; - int pid = userspace_pid[current_thread_info()->cpu]; - - err = copy_from_user(&fpx, - &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0], - sizeof(struct user_fxsr_struct)); - if (err) - return 1; - - err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate); - if (err) - return 1; - - err = restore_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - " - "restore_fpx_registers failed, errno = %d\n", - -err); - return 1; - } - } else + from_fp64 = ((void __user *)sc.fpstate) + + offsetof(struct _fpstate_32, _fxsr_env); +#else + from_fp64 = (void __user *)sc.fpstate; #endif - { - err = copy_from_user(regs->regs.fp, (void *)sc.fpstate, - sizeof(struct _xstate)); - if (err) - return 1; - } + + err = copy_from_user(regs->regs.fp, from_fp64, host_fp_size); + if (err) + return 1; + +#ifdef CONFIG_X86_32 + /* Data is duplicated and this copy is the important one */ + err = copy_regset_from_user(current, + task_user_regset_view(current), + REGSET_FP_LEGACY, 0, + sizeof(struct user_i387_struct), + (void __user *)sc.fpstate); + if (err < 0) + return err; +#endif + return 0; } @@ -239,6 +110,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, struct _xstate __user *to_fp, struct pt_regs *regs, unsigned long mask) { + struct _xstate_64 __user *to_fp64; struct sigcontext sc; struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err; @@ -290,35 +162,46 @@ static int copy_sc_to_user(struct sigcontext __user *to, return 1; #ifdef CONFIG_X86_32 - if (have_fpx_regs) { - int pid = userspace_pid[current_thread_info()->cpu]; - struct user_fxsr_struct fpx; + err = copy_regset_to_user(current, + task_user_regset_view(current), + REGSET_FP_LEGACY, 0, + sizeof(struct _fpstate_32), to_fp); + if (err < 0) + return err; - err = save_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0){ - printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " - "failed, errno = %d\n", err); - return 1; - } + __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); - err = convert_fxsr_to_user(&to_fp->fpstate, &fpx); - if (err) - return 1; + BUILD_BUG_ON(offsetof(struct _xstate, xstate_hdr) != + offsetof(struct _xstate_64, xstate_hdr) + + offsetof(struct _fpstate_32, _fxsr_env)); + to_fp64 = (void __user *)to_fp + + offsetof(struct _fpstate_32, _fxsr_env); +#else + to_fp64 = to_fp; +#endif /* CONFIG_X86_32 */ - err |= __put_user(fpx.swd, &to_fp->fpstate.status); - err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); - if (err) - return 1; + if (copy_to_user(to_fp64, regs->regs.fp, host_fp_size)) + return 1; - if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx, - sizeof(struct user_fxsr_struct))) - return 1; - } else + /* + * Put magic/size values for userspace. We do not bother to verify them + * later on, however, userspace needs them should it try to read the + * XSTATE data. And ptrace does not fill in these parts. + */ + BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE); +#ifdef CONFIG_X86_32 + __put_user(offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE, + &to_fp64->fpstate.sw_reserved.extended_size); +#else + __put_user(host_fp_size + FP_XSTATE_MAGIC2_SIZE, + &to_fp64->fpstate.sw_reserved.extended_size); #endif - { - if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate))) - return 1; - } + __put_user(host_fp_size, &to_fp64->fpstate.sw_reserved.xstate_size); + + __put_user(FP_XSTATE_MAGIC1, &to_fp64->fpstate.sw_reserved.magic1); + __put_user(FP_XSTATE_MAGIC2, + (int __user *)((void __user *)to_fp64 + host_fp_size)); return 0; } @@ -336,34 +219,15 @@ static int copy_ucontext_to_user(struct ucontext __user *uc, return err; } -struct sigframe -{ - char __user *pretcode; - int sig; - struct sigcontext sc; - struct _xstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char __user *pretcode; - int sig; - struct siginfo __user *pinfo; - void __user *puc; - struct siginfo info; - struct ucontext uc; - struct _xstate fpstate; - char retcode[8]; -}; - int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *mask) { + size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct sigframe __user *frame; void __user *restorer; int err = 0, sig = ksig->sig; + unsigned long fp_to; /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ stack_top = ((stack_top + 4) & -16UL) - 4; @@ -371,13 +235,21 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, if (!access_ok(frame, sizeof(*frame))) return 1; + /* Add required space for math frame */ + frame = (struct sigframe __user *)((unsigned long)frame - math_size); + restorer = frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(restorer, (void __user * __user *)&frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]); + + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_sc_to_user(&frame->sc, + (struct _xstate __user *)fp_to, + regs, mask->sig[0]); if (_NSIG_WORDS > 1) err |= __copy_to_user(&frame->extramask, &mask->sig[1], sizeof(frame->extramask)); @@ -407,26 +279,35 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *mask) { + size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct rt_sigframe __user *frame; void __user *restorer; int err = 0, sig = ksig->sig; + unsigned long fp_to; stack_top &= -8UL; frame = (struct rt_sigframe __user *) stack_top - 1; if (!access_ok(frame, sizeof(*frame))) return 1; + /* Add required space for math frame */ + frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + restorer = frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(restorer, (void __user * __user *)&frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); + err |= __put_user(&frame->info, (void __user * __user *)&frame->pinfo); + err |= __put_user(&frame->uc, (void __user * __user *)&frame->puc); err |= copy_siginfo_to_user(&frame->info, &ksig->info); - err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, - PT_REGS_SP(regs)); + + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_ucontext_to_user(&frame->uc, (struct _xstate __user *)fp_to, + mask, PT_REGS_SP(regs)); /* * This is movl $,%eax ; int $0x80 @@ -478,27 +359,24 @@ SYSCALL_DEFINE0(sigreturn) #else -struct rt_sigframe -{ - char __user *pretcode; - struct ucontext uc; - struct siginfo info; - struct _xstate fpstate; -}; - int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *set) { + unsigned long math_size = host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct rt_sigframe __user *frame; int err = 0, sig = ksig->sig; unsigned long fp_to; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); + + /* Add required space for math frame */ + frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + /* Subtract 128 for a red zone and 8 for proper alignment */ frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); - if (!access_ok(frame, sizeof(*frame))) + if (!access_ok(frame, sizeof(*frame) + math_size)) goto out; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { @@ -509,12 +387,14 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(NULL, &frame->uc.uc_link); err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); - err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, - set->sig[0]); - fp_to = (unsigned long)&frame->fpstate; + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_sc_to_user(&frame->uc.uc_mcontext, + (struct _xstate __user *)fp_to, + regs, set->sig[0]); err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { @@ -531,7 +411,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, */ /* x86-64 should always use SA_RESTORER. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) - err |= __put_user((void *)ksig->ka.sa.sa_restorer, + err |= __put_user((void __user *)ksig->ka.sa.sa_restorer, &frame->pretcode); else /* could use a vstub here */ diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index 1c77d9946199..d6e1cd9956bf 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -20,9 +20,6 @@ void foo(void); void foo(void) { #ifdef __i386__ - DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct)); - DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct)); - DEFINE(HOST_IP, EIP); DEFINE(HOST_SP, UESP); DEFINE(HOST_EFLAGS, EFL); @@ -41,11 +38,6 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else -#ifdef FP_XSTATE_MAGIC1 - DEFINE_LONGS(HOST_FP_SIZE, 2696); -#else - DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); -#endif DEFINE_LONGS(HOST_BX, RBX); DEFINE_LONGS(HOST_CX, RCX); DEFINE_LONGS(HOST_DI, RDI); diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 6a77ea6434ff..7478d11dacb7 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -56,7 +56,6 @@ CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage quiet_cmd_vdso = VDSO $@ cmd_vdso = $(CC) -nostdlib -o $@ \ $(CC_FLAGS_LTO) $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ - sh $(src)/checkundef.sh '$(NM)' '$@' + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack +VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack -Wl,--no-undefined diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh deleted file mode 100644 index 8e3ea6bb956f..000000000000 --- a/arch/x86/um/vdso/checkundef.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -nm="$1" -file="$2" -$nm "$file" | grep '^ *U' > /dev/null 2>&1 -if [ $? -eq 1 ]; then - exit 0 -else - echo "$file: undefined symbols found" >&2 - exit 1 -fi diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 6d1cf2436ead..7e51d2cec64b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -57,6 +57,7 @@ static int __init hostfs_args(char *options, int *add) { char *ptr; + *add = 0; ptr = strchr(options, ','); if (ptr != NULL) *ptr++ = '\0'; @@ -471,8 +472,8 @@ static int hostfs_write_begin(struct file *file, struct address_space *mapping, *foliop = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); - if (!*foliop) - return -ENOMEM; + if (IS_ERR(*foliop)) + return PTR_ERR(*foliop); return 0; }