> + out:
> + up_write(&mm->mmap_sem);
> + return ret;
> +}
> +
> +/**
> + * cr_vma_read_pages - read in pages for to restore a vma
> + * @ctx - restart context
> + * @cr_vma - vma descriptor from restart
> + */
> +static int cr_vma_read_pages(struct cr_ctx *ctx, struct cr_hdr_vma *hh)
> +{
> + struct mm_struct *mm = current->mm;
> + int ret = 0;
> +
> + if (!hh->nr_pages)
> + return 0;
> +
> + /* in the unlikely case that this vma is read-only */
> + if (!(hh->vm_flags & VM_WRITE))
> + ret = cr_vma_set_writable(mm, hh->vm_start, hh->vm_end, 1);
> + if (ret < 0)
> + goto out;
> + ret = cr_vma_read_pages_vaddrs(ctx, hh->nr_pages);
> + if (ret < 0)
> + goto out;
> + ret = cr_vma_read_pages_contents(ctx, hh->nr_pages);
> + if (ret < 0)
> + goto out;
> +
> + cr_pgarr_reset(ctx); /* reset page-array chain */
> +
> + /* restore original protection for this vma */
> + if (!(hh->vm_flags & VM_WRITE))
> + ret = cr_vma_set_writable(mm, hh->vm_start, hh->vm_end, 0);
> +
> + out:
> + return ret;
> +}
> +
> +/**
> + * cr_calc_map_prot_bits - convert vm_flags to mmap protection
> + * orig_vm_flags: source vm_flags
> + */
> +static unsigned long cr_calc_map_prot_bits(unsigned long orig_vm_flags)
> +{
> + unsigned long vm_prot = 0;
> +
> + if (orig_vm_flags & VM_READ)
> + vm_prot |= PROT_READ;
> + if (orig_vm_flags & VM_WRITE)
> + vm_prot |= PROT_WRITE;
> + if (orig_vm_flags & VM_EXEC)
> + vm_prot |= PROT_EXEC;
> + if (orig_vm_flags & PROT_SEM) /* only (?) with IPC-SHM */
> + vm_prot |= PROT_SEM;
> +
> + return vm_prot;
> +}
> +
> +/**
> + * cr_calc_map_flags_bits - convert vm_flags to mmap flags
> + * orig_vm_flags: source vm_flags
> + */
> +static unsigned long cr_calc_map_flags_bits(unsigned long orig_vm_flags)
> +{
> + unsigned long vm_flags = 0;
> +
> + vm_flags = MAP_FIXED;
> + if (orig_vm_flags & VM_GROWSDOWN)
> + vm_flags |= MAP_GROWSDOWN;
> + if (orig_vm_flags & VM_DENYWRITE)
> + vm_flags |= MAP_DENYWRITE;
> + if (orig_vm_flags & VM_EXECUTABLE)
> + vm_flags |= MAP_EXECUTABLE;
> + if (orig_vm_flags & VM_MAYSHARE)
> + vm_flags |= MAP_SHARED;
> + else
> + vm_flags |= MAP_PRIVATE;
> +
> + return vm_flags;
> +}
> +
> +static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
> +{
> + struct cr_hdr_vma *hh = cr_hbuf_get(ctx, sizeof(*hh));
> + unsigned long vm_size, vm_start, vm_flags, vm_prot, vm_pgoff;
> + unsigned long addr;
> + unsigned long flags;
> + struct file *file = NULL;
> + int parent, ret = 0;
> +
> + parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_VMA);
> + if (parent < 0)
> + return parent;
> + else if (parent != 0)
> + return -EINVAL;
> +
> + cr_debug("vma %#lx-%#lx type %d nr_pages %d\n",
> + (unsigned long) hh->vm_start, (unsigned long) hh->vm_end,
> + (int) hh->vma_type, (int) hh->nr_pages);
> +
> + if (hh->vm_end < hh->vm_start || hh->nr_pages < 0)
> + return -EINVAL;
> +
> + vm_start = hh->vm_start;
> + vm_pgoff = hh->vm_pgoff;
> + vm_size = hh->vm_end - hh->vm_start;
> + vm_prot = cr_calc_map_prot_bits(hh->vm_flags);
> + vm_flags = cr_calc_map_flags_bits(hh->vm_flags);
> +
> + switch (hh->vma_type) {
> +
> + case CR_VMA_ANON: /* anonymous private mapping */
> + /* vm_pgoff for anonymous mapping is the "global" page
> + offset (namely from addr 0x0), so we force a zero */
> + vm_pgoff = 0;
> + break;
> +
> + case CR_VMA_FILE: /* private mapping from a file */
> + /* O_RDWR only needed if both (VM_WRITE|VM_SHARED) are set */
> + flags = hh->vm_flags;
> + if ((flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))
> + flags = O_RDWR;
> + else
> + flags = O_RDONLY;
> + file = cr_read_open_fname(ctx, flags, 0);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> + break;
> +
> + default:
> + return -EINVAL;
> +
> + }
> +
> + down_write(&mm->mmap_sem);
> + addr = do_mmap_pgoff(file, vm_start, vm_size,
> + vm_prot, vm_flags, vm_pgoff);
> + up_write(&mm->mmap_sem);
> + cr_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n",
> + vm_size, vm_prot, vm_flags, vm_pgoff, addr);
> +
> + /* the file (if opened) is now referenced by the vma */
> + if (file)
> + filp_close(file, NULL);
> +
> + if (IS_ERR((void *) addr))
> + return PTR_ERR((void *) addr);
> +
> + /*
> + * CR_VMA_ANON: read in memory as is
> + * CR_VMA_FILE: read in memory as is
> + * (more to follow ...)
> + */
> +
> + switch (hh->vma_type) {
> + case CR_VMA_ANON:
> + case CR_VMA_FILE:
> + /* standard case: read the data into the memory */
> + ret = cr_vma_read_pages(ctx, hh);
> + break;
> + }
> +
> + if (ret < 0)
> + return ret;
> +
> + cr_hbuf_put(ctx, sizeof(*hh));
> + cr_debug("vma retval %d\n", ret);
> + return 0;
> +}
> +
> +static int cr_destroy_mm(struct mm_struct *mm)
> +{
> + struct vm_area_struct *vmnext = mm->mmap;
> + struct vm_area_struct *vma;
> + int ret;
> +
> + while (vmnext) {
> + vma = vmnext;
> + vmnext = vmnext->vm_next;
> + ret = do_munmap(mm, vma->vm_start, vma->vm_end-vma->vm_start);
> + if (ret < 0) {
> + pr_debug("CR: restart failed do_munmap (%d)\n", ret);
> + return ret;
> + }
> + }
> + return 0;
> +}
> +
> +int cr_read_mm(struct cr_ctx *ctx)
> +{
> + struct cr_hdr_mm *hh = cr_hbuf_get(ctx, sizeof(*hh));
> + struct mm_struct *mm;
> + int nr, parent, ret;
> +
> + parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_MM);
> + if (parent < 0)
> + return parent;
> +#if 0 /* activate when containers are used */
> + if (parent != task_pid_vnr(current))
> + return -EINVAL;
> +#endif
> + cr_debug("map_count %d\n", hh->map_count);
> +
> + /* XXX need more sanity checks */
> + if (hh->start_code > hh->end_code ||
> + hh->start_data > hh->end_data || hh->map_count < 0)
> + return -EINVAL;
> +
> + mm = current->mm;
> +
> + /* point of no return -- destruct current mm */
> + down_write(&mm->mmap_sem);
> + ret = cr_destroy_mm(mm);
> + if (ret < 0) {
> + up_write(&mm->mmap_sem);
> + return ret;
> + }
> + mm->start_code = hh->start_code;
> + mm->end_code = hh->end_code;
> + mm->start_data = hh->start_data;
> + mm->end_data = hh->end_data;
> + mm->start_brk = hh->start_brk;
> + mm->brk = hh->brk;
> + mm->start_stack = hh->start_stack;
> + mm->arg_start = hh->arg_start;
> + mm->arg_end = hh->arg_end;
> + mm->env_start = hh->env_start;
> + mm->env_end = hh->env_end;
> + up_write(&mm->mmap_sem);
> +
> +
> + /* FIX: need also mm->flags */
> +
> + for (nr = hh->map_count; nr; nr--) {
> + ret = cr_read_vma(ctx, mm);
> + if (ret < 0)
> + return ret;
> + }
> +
> + ret = cr_read_mm_context(ctx, mm, hh->objref);
> +
> + cr_hbuf_put(ctx, sizeof(*hh));
> + return ret;
> +}
> diff --git a/include/asm-x86/ckpt_hdr.h b/include/asm-x86/ckpt_hdr.h
> index 6bc61ac..f8eee6a 100644
> --- a/include/asm-x86/ckpt_hdr.h
> +++ b/include/asm-x86/ckpt_hdr.h
> @@ -74,4 +74,8 @@ struct cr_hdr_mm_context {
> __s16 nldt;
> } __attribute__((aligned(8)));
>
> +
> +/* misc prototypes from kernel (not defined elsewhere) */
> +asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount);
> +
> #endif /* __ASM_X86_CKPT_HDR__H */
> diff --git a/include/linux/ckpt.h b/include/linux/ckpt.h
> index 5c62a90..9305e7b 100644
> --- a/include/linux/ckpt.h
> +++ b/include/linux/ckpt.h
> @@ -59,6 +59,8 @@ int cr_write_fname(struct cr_ctx *ctx, struct path *path, struct path *root);
> int cr_read_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf, int n);
> int cr_read_obj_type(struct cr_ctx *ctx, void *buf, int n, int type);
> int cr_read_string(struct cr_ctx *ctx, void *str, int len);
> +int cr_read_fname(struct cr_ctx *ctx, void *fname, int n);
> +struct file *cr_read_open_fname(struct cr_ctx *ctx, int flags, int mode);
>
> int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t);
> int cr_read_mm(struct cr_ctx *ctx);
> diff --git a/include/linux/ckpt_hdr.h b/include/linux/ckpt_hdr.h
> index ac77d7d..f064cbb 100644
> --- a/include/linux/ckpt_hdr.h
> +++ b/include/linux/ckpt_hdr.h
> @@ -102,7 +102,7 @@ enum vm_type {
> struct cr_hdr_vma {
> __u32 vma_type;
> __u32 _padding;
> - __s64 nr_pages;
> + __s64 nr_pages; /* number of pages saved */
>
> __u64 vm_start;
> __u64 vm_end;
> --
> 1.5.4.3
>
> _______________________________________________
> Containers mailing list
>
Containers@lists.linux-foundation.org
>
https://lists.linux-foundation.org/mailman/listinfo/containers