Advertisement
Guest User

Untitled

a guest
Apr 19th, 2012
220
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 39.94 KB | None | 0 0
  1. /*
  2.  * kexec.c - kexec system call
  3.  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
  4.  *
  5.  * This source code is licensed under the GNU General Public License,
  6.  * Version 2.  See the file COPYING for more details.
  7.  */
  8.  
  9. #include <linux/capability.h>
  10. #include <linux/mm.h>
  11. #include <linux/file.h>
  12. #include <linux/slab.h>
  13. #include <linux/fs.h>
  14. #include <linux/kexec.h>
  15. #include <linux/mutex.h>
  16. #include <linux/list.h>
  17. #include <linux/highmem.h>
  18. #include <linux/syscalls.h>
  19. #include <linux/reboot.h>
  20. #include <linux/ioport.h>
  21. #include <linux/hardirq.h>
  22. #include <linux/elf.h>
  23. #include <linux/elfcore.h>
  24. #include <linux/utsrelease.h>
  25. #include <linux/utsname.h>
  26. #include <linux/numa.h>
  27. #include <linux/suspend.h>
  28. #include <linux/device.h>
  29. #include <linux/freezer.h>
  30. #include <linux/pm.h>
  31. #include <linux/cpu.h>
  32. #include <linux/console.h>
  33. #include <linux/vmalloc.h>
  34.  
  35. #include <asm/page.h>
  36. #include <asm/uaccess.h>
  37. #include <asm/io.h>
  38. #include <asm/system.h>
  39. #include <asm/sections.h>
  40. #include <asm/unistd.h>
  41.  
  42. MODULE_LICENSE("GPL");
  43.  
  44. /* Syscall table */
  45. void **sys_call_table;
  46.  
  47. /* original and new reboot syscall */
  48. asmlinkage long (*original_reboot)(int magic1, int magic2, unsigned int cmd, void __user *arg);
  49. extern asmlinkage long reboot(int magic1, int magic2, unsigned int cmd, void __user *arg);
  50.  
  51. /* Per cpu memory for storing cpu states in case of system crash. */
  52. note_buf_t *crash_notes;
  53.  
  54. /* vmcoreinfo stuff */
  55. unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
  56. u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
  57. size_t vmcoreinfo_size;
  58. size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
  59.  
  60. /* Location of the reserved area for the crash kernel */
  61. struct resource crashk_res = {
  62.     .name  = "Crash kernel",
  63.     .start = 0,
  64.     .end   = 0,
  65.     .flags = IORESOURCE_BUSY | IORESOURCE_MEM
  66. };
  67.  
  68. /*
  69.  * When kexec transitions to the new kernel there is a one-to-one
  70.  * mapping between physical and virtual addresses.  On processors
  71.  * where you can disable the MMU this is trivial, and easy.  For
  72.  * others it is still a simple predictable page table to setup.
  73.  *
  74.  * In that environment kexec copies the new kernel to its final
  75.  * resting place.  This means I can only support memory whose
  76.  * physical address can fit in an unsigned long.  In particular
  77.  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
  78.  * If the assembly stub has more restrictive requirements
  79.  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
  80.  * defined more restrictively in <asm/kexec.h>.
  81.  *
  82.  * The code for the transition from the current kernel to the
  83.  * the new kernel is placed in the control_code_buffer, whose size
  84.  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
  85.  * page of memory is necessary, but some architectures require more.
  86.  * Because this memory must be identity mapped in the transition from
  87.  * virtual to physical addresses it must live in the range
  88.  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
  89.  * modifiable.
  90.  *
  91.  * The assembly stub in the control code buffer is passed a linked list
  92.  * of descriptor pages detailing the source pages of the new kernel,
  93.  * and the destination addresses of those source pages.  As this data
  94.  * structure is not used in the context of the current OS, it must
  95.  * be self-contained.
  96.  *
  97.  * The code has been made to work with highmem pages and will use a
  98.  * destination page in its final resting place (if it happens
  99.  * to allocate it).  The end product of this is that most of the
  100.  * physical address space, and most of RAM can be used.
  101.  *
  102.  * Future directions include:
  103.  *  - allocating a page table with the control code buffer identity
  104.  *    mapped, to simplify machine_kexec and make kexec_on_panic more
  105.  *    reliable.
  106.  */
  107.  
  108. /*
  109.  * KIMAGE_NO_DEST is an impossible destination address..., for
  110.  * allocating pages whose destination address we do not care about.
  111.  */
  112. #define KIMAGE_NO_DEST (-1UL)
  113.  
  114. static int kimage_is_destination_range(struct kimage *image,
  115.                        unsigned long start, unsigned long end);
  116. static struct page *kimage_alloc_page(struct kimage *image,
  117.                        gfp_t gfp_mask,
  118.                        unsigned long dest);
  119.  
  120. static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
  121.                         unsigned long nr_segments,
  122.                             struct kexec_segment __user *segments)
  123. {
  124.     size_t segment_bytes;
  125.     struct kimage *image;
  126.     unsigned long i;
  127.     int result;
  128.  
  129.     /* Allocate a controlling structure */
  130.     result = -ENOMEM;
  131.     image = kzalloc(sizeof(*image), GFP_KERNEL);
  132.     if (!image)
  133.         goto out;
  134.  
  135.     image->head = 0;
  136.     image->entry = &image->head;
  137.     image->last_entry = &image->head;
  138.     image->control_page = ~0; /* By default this does not apply */
  139.     image->start = entry;
  140.     image->type = KEXEC_TYPE_DEFAULT;
  141.  
  142.     /* Initialize the list of control pages */
  143.     INIT_LIST_HEAD(&image->control_pages);
  144.  
  145.     /* Initialize the list of destination pages */
  146.     INIT_LIST_HEAD(&image->dest_pages);
  147.  
  148.     /* Initialize the list of unuseable pages */
  149.     INIT_LIST_HEAD(&image->unuseable_pages);
  150.  
  151.     /* Read in the segments */
  152.     image->nr_segments = nr_segments;
  153.     segment_bytes = nr_segments * sizeof(*segments);
  154.     result = copy_from_user(image->segment, segments, segment_bytes);
  155.     if (result)
  156.         goto out;
  157.  
  158.     /*
  159.      * Verify we have good destination addresses.  The caller is
  160.      * responsible for making certain we don't attempt to load
  161.      * the new image into invalid or reserved areas of RAM.  This
  162.      * just verifies it is an address we can use.
  163.      *
  164.      * Since the kernel does everything in page size chunks ensure
  165.      * the destination addreses are page aligned.  Too many
  166.      * special cases crop of when we don't do this.  The most
  167.      * insidious is getting overlapping destination addresses
  168.      * simply because addresses are changed to page size
  169.      * granularity.
  170.      */
  171.     result = -EADDRNOTAVAIL;
  172.     for (i = 0; i < nr_segments; i++) {
  173.         unsigned long mstart, mend;
  174.  
  175.         mstart = image->segment[i].mem;
  176.         mend   = mstart + image->segment[i].memsz;
  177.         if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
  178.             goto out;
  179.         if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
  180.             goto out;
  181.     }
  182.  
  183.     /* Verify our destination addresses do not overlap.
  184.      * If we alloed overlapping destination addresses
  185.      * through very weird things can happen with no
  186.      * easy explanation as one segment stops on another.
  187.      */
  188.     result = -EINVAL;
  189.     for (i = 0; i < nr_segments; i++) {
  190.         unsigned long mstart, mend;
  191.         unsigned long j;
  192.  
  193.         mstart = image->segment[i].mem;
  194.         mend   = mstart + image->segment[i].memsz;
  195.         for (j = 0; j < i; j++) {
  196.             unsigned long pstart, pend;
  197.             pstart = image->segment[j].mem;
  198.             pend   = pstart + image->segment[j].memsz;
  199.             /* Do the segments overlap ? */
  200.             if ((mend > pstart) && (mstart < pend))
  201.                 goto out;
  202.         }
  203.     }
  204.  
  205.     /* Ensure our buffer sizes are strictly less than
  206.      * our memory sizes.  This should always be the case,
  207.      * and it is easier to check up front than to be surprised
  208.      * later on.
  209.      */
  210.     result = -EINVAL;
  211.     for (i = 0; i < nr_segments; i++) {
  212.         if (image->segment[i].bufsz > image->segment[i].memsz)
  213.             goto out;
  214.     }
  215.  
  216.     result = 0;
  217. out:
  218.     if (result == 0)
  219.         *rimage = image;
  220.     else
  221.         kfree(image);
  222.  
  223.     return result;
  224.  
  225. }
  226.  
  227. static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
  228.                 unsigned long nr_segments,
  229.                 struct kexec_segment __user *segments)
  230. {
  231.     int result;
  232.     struct kimage *image;
  233.  
  234.     /* Allocate and initialize a controlling structure */
  235.     image = NULL;
  236.     result = do_kimage_alloc(&image, entry, nr_segments, segments);
  237.     if (result)
  238.         goto out;
  239.  
  240.     *rimage = image;
  241.  
  242.     /*
  243.      * Find a location for the control code buffer, and add it
  244.      * the vector of segments so that it's pages will also be
  245.      * counted as destination pages.
  246.      */
  247.     result = -ENOMEM;
  248.     image->control_code_page = kimage_alloc_control_pages(image,
  249.                        get_order(KEXEC_CONTROL_PAGE_SIZE));
  250.     if (!image->control_code_page) {
  251.         printk(KERN_ERR "Could not allocate control_code_buffer\n");
  252.         goto out;
  253.     }
  254.  
  255.     image->swap_page = kimage_alloc_control_pages(image, 0);
  256.     if (!image->swap_page) {
  257.         printk(KERN_ERR "Could not allocate swap buffer\n");
  258.         goto out;
  259.     }
  260.  
  261.     result = 0;
  262.  out:
  263.     if (result == 0)
  264.         *rimage = image;
  265.     else
  266.         kfree(image);
  267.  
  268.     return result;
  269. }
  270.  
  271. static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
  272.                 unsigned long nr_segments,
  273.                 struct kexec_segment __user *segments)
  274. {
  275.     int result;
  276.     struct kimage *image;
  277.     unsigned long i;
  278.  
  279.     image = NULL;
  280.     /* Verify we have a valid entry point */
  281.     if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
  282.         result = -EADDRNOTAVAIL;
  283.         goto out;
  284.     }
  285.  
  286.     /* Allocate and initialize a controlling structure */
  287.     result = do_kimage_alloc(&image, entry, nr_segments, segments);
  288.     if (result)
  289.         goto out;
  290.  
  291.     /* Enable the special crash kernel control page
  292.      * allocation policy.
  293.      */
  294.     image->control_page = crashk_res.start;
  295.     image->type = KEXEC_TYPE_CRASH;
  296.  
  297.     /*
  298.      * Verify we have good destination addresses.  Normally
  299.      * the caller is responsible for making certain we don't
  300.      * attempt to load the new image into invalid or reserved
  301.      * areas of RAM.  But crash kernels are preloaded into a
  302.      * reserved area of ram.  We must ensure the addresses
  303.      * are in the reserved area otherwise preloading the
  304.      * kernel could corrupt things.
  305.      */
  306.     result = -EADDRNOTAVAIL;
  307.     for (i = 0; i < nr_segments; i++) {
  308.         unsigned long mstart, mend;
  309.  
  310.         mstart = image->segment[i].mem;
  311.         mend = mstart + image->segment[i].memsz - 1;
  312.         /* Ensure we are within the crash kernel limits */
  313.         if ((mstart < crashk_res.start) || (mend > crashk_res.end))
  314.             goto out;
  315.     }
  316.  
  317.     /*
  318.      * Find a location for the control code buffer, and add
  319.      * the vector of segments so that it's pages will also be
  320.      * counted as destination pages.
  321.      */
  322.     result = -ENOMEM;
  323.     image->control_code_page = kimage_alloc_control_pages(image,
  324.                        get_order(KEXEC_CONTROL_PAGE_SIZE));
  325.     if (!image->control_code_page) {
  326.         printk(KERN_ERR "Could not allocate control_code_buffer\n");
  327.         goto out;
  328.     }
  329.  
  330.     result = 0;
  331. out:
  332.     if (result == 0)
  333.         *rimage = image;
  334.     else
  335.         kfree(image);
  336.  
  337.     return result;
  338. }
  339.  
  340. static int kimage_is_destination_range(struct kimage *image,
  341.                     unsigned long start,
  342.                     unsigned long end)
  343. {
  344.     unsigned long i;
  345.  
  346.     for (i = 0; i < image->nr_segments; i++) {
  347.         unsigned long mstart, mend;
  348.  
  349.         mstart = image->segment[i].mem;
  350.         mend = mstart + image->segment[i].memsz;
  351.         if ((end > mstart) && (start < mend))
  352.             return 1;
  353.     }
  354.  
  355.     return 0;
  356. }
  357.  
  358. static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
  359. {
  360.     struct page *pages;
  361.  
  362.     pages = alloc_pages(gfp_mask, order);
  363.     if (pages) {
  364.         unsigned int count, i;
  365.         pages->mapping = NULL;
  366.         set_page_private(pages, order);
  367.         count = 1 << order;
  368.         for (i = 0; i < count; i++)
  369.             SetPageReserved(pages + i);
  370.     }
  371.  
  372.     return pages;
  373. }
  374.  
  375. static void kimage_free_pages(struct page *page)
  376. {
  377.     unsigned int order, count, i;
  378.  
  379.     order = page_private(page);
  380.     count = 1 << order;
  381.     for (i = 0; i < count; i++)
  382.         ClearPageReserved(page + i);
  383.     __free_pages(page, order);
  384. }
  385.  
  386. static void kimage_free_page_list(struct list_head *list)
  387. {
  388.     struct list_head *pos, *next;
  389.  
  390.     list_for_each_safe(pos, next, list) {
  391.         struct page *page;
  392.  
  393.         page = list_entry(pos, struct page, lru);
  394.         list_del(&page->lru);
  395.         kimage_free_pages(page);
  396.     }
  397. }
  398.  
  399. static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
  400.                             unsigned int order)
  401. {
  402.     /* Control pages are special, they are the intermediaries
  403.      * that are needed while we copy the rest of the pages
  404.      * to their final resting place.  As such they must
  405.      * not conflict with either the destination addresses
  406.      * or memory the kernel is already using.
  407.      *
  408.      * The only case where we really need more than one of
  409.      * these are for architectures where we cannot disable
  410.      * the MMU and must instead generate an identity mapped
  411.      * page table for all of the memory.
  412.      *
  413.      * At worst this runs in O(N) of the image size.
  414.      */
  415.     struct list_head extra_pages;
  416.     struct page *pages;
  417.     unsigned int count;
  418.  
  419.     count = 1 << order;
  420.     INIT_LIST_HEAD(&extra_pages);
  421.  
  422.     /* Loop while I can allocate a page and the page allocated
  423.      * is a destination page.
  424.      */
  425.     do {
  426.         unsigned long pfn, epfn, addr, eaddr;
  427.  
  428.         pages = kimage_alloc_pages(GFP_KERNEL, order);
  429.         if (!pages)
  430.             break;
  431.         pfn   = page_to_pfn(pages);
  432.         epfn  = pfn + count;
  433.         addr  = pfn << PAGE_SHIFT;
  434.         eaddr = epfn << PAGE_SHIFT;
  435.         if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
  436.                   kimage_is_destination_range(image, addr, eaddr)) {
  437.             list_add(&pages->lru, &extra_pages);
  438.             pages = NULL;
  439.         }
  440.     } while (!pages);
  441.  
  442.     if (pages) {
  443.         /* Remember the allocated page... */
  444.         list_add(&pages->lru, &image->control_pages);
  445.  
  446.         /* Because the page is already in it's destination
  447.          * location we will never allocate another page at
  448.          * that address.  Therefore kimage_alloc_pages
  449.          * will not return it (again) and we don't need
  450.          * to give it an entry in image->segment[].
  451.          */
  452.     }
  453.     /* Deal with the destination pages I have inadvertently allocated.
  454.      *
  455.      * Ideally I would convert multi-page allocations into single
  456.      * page allocations, and add everyting to image->dest_pages.
  457.      *
  458.      * For now it is simpler to just free the pages.
  459.      */
  460.     kimage_free_page_list(&extra_pages);
  461.  
  462.     return pages;
  463. }
  464.  
  465. static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
  466.                               unsigned int order)
  467. {
  468.     /* Control pages are special, they are the intermediaries
  469.      * that are needed while we copy the rest of the pages
  470.      * to their final resting place.  As such they must
  471.      * not conflict with either the destination addresses
  472.      * or memory the kernel is already using.
  473.      *
  474.      * Control pages are also the only pags we must allocate
  475.      * when loading a crash kernel.  All of the other pages
  476.      * are specified by the segments and we just memcpy
  477.      * into them directly.
  478.      *
  479.      * The only case where we really need more than one of
  480.      * these are for architectures where we cannot disable
  481.      * the MMU and must instead generate an identity mapped
  482.      * page table for all of the memory.
  483.      *
  484.      * Given the low demand this implements a very simple
  485.      * allocator that finds the first hole of the appropriate
  486.      * size in the reserved memory region, and allocates all
  487.      * of the memory up to and including the hole.
  488.      */
  489.     unsigned long hole_start, hole_end, size;
  490.     struct page *pages;
  491.  
  492.     pages = NULL;
  493.     size = (1 << order) << PAGE_SHIFT;
  494.     hole_start = (image->control_page + (size - 1)) & ~(size - 1);
  495.     hole_end   = hole_start + size - 1;
  496.     while (hole_end <= crashk_res.end) {
  497.         unsigned long i;
  498.  
  499.         if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
  500.             break;
  501.         if (hole_end > crashk_res.end)
  502.             break;
  503.         /* See if I overlap any of the segments */
  504.         for (i = 0; i < image->nr_segments; i++) {
  505.             unsigned long mstart, mend;
  506.  
  507.             mstart = image->segment[i].mem;
  508.             mend   = mstart + image->segment[i].memsz - 1;
  509.             if ((hole_end >= mstart) && (hole_start <= mend)) {
  510.                 /* Advance the hole to the end of the segment */
  511.                 hole_start = (mend + (size - 1)) & ~(size - 1);
  512.                 hole_end   = hole_start + size - 1;
  513.                 break;
  514.             }
  515.         }
  516.         /* If I don't overlap any segments I have found my hole! */
  517.         if (i == image->nr_segments) {
  518.             pages = pfn_to_page(hole_start >> PAGE_SHIFT);
  519.             break;
  520.         }
  521.     }
  522.     if (pages)
  523.         image->control_page = hole_end;
  524.  
  525.     return pages;
  526. }
  527.  
  528.  
  529. struct page *kimage_alloc_control_pages(struct kimage *image,
  530.                      unsigned int order)
  531. {
  532.     struct page *pages = NULL;
  533.  
  534.     switch (image->type) {
  535.     case KEXEC_TYPE_DEFAULT:
  536.         pages = kimage_alloc_normal_control_pages(image, order);
  537.         break;
  538.     case KEXEC_TYPE_CRASH:
  539.         pages = kimage_alloc_crash_control_pages(image, order);
  540.         break;
  541.     }
  542.  
  543.     return pages;
  544. }
  545.  
  546. static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
  547. {
  548.     if (*image->entry != 0)
  549.         image->entry++;
  550.  
  551.     if (image->entry == image->last_entry) {
  552.         kimage_entry_t *ind_page;
  553.         struct page *page;
  554.  
  555.         page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
  556.         if (!page)
  557.             return -ENOMEM;
  558.  
  559.         ind_page = page_address(page);
  560.         *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
  561.         image->entry = ind_page;
  562.         image->last_entry = ind_page +
  563.                       ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
  564.     }
  565.     *image->entry = entry;
  566.     image->entry++;
  567.     *image->entry = 0;
  568.  
  569.     return 0;
  570. }
  571.  
  572. static int kimage_set_destination(struct kimage *image,
  573.                    unsigned long destination)
  574. {
  575.     int result;
  576.  
  577.     destination &= PAGE_MASK;
  578.     result = kimage_add_entry(image, destination | IND_DESTINATION);
  579.     if (result == 0)
  580.         image->destination = destination;
  581.  
  582.     return result;
  583. }
  584.  
  585.  
  586. static int kimage_add_page(struct kimage *image, unsigned long page)
  587. {
  588.     int result;
  589.  
  590.     page &= PAGE_MASK;
  591.     result = kimage_add_entry(image, page | IND_SOURCE);
  592.     if (result == 0)
  593.         image->destination += PAGE_SIZE;
  594.  
  595.     return result;
  596. }
  597.  
  598.  
  599. static void kimage_free_extra_pages(struct kimage *image)
  600. {
  601.     /* Walk through and free any extra destination pages I may have */
  602.     kimage_free_page_list(&image->dest_pages);
  603.  
  604.     /* Walk through and free any unuseable pages I have cached */
  605.     kimage_free_page_list(&image->unuseable_pages);
  606.  
  607. }
  608. static void kimage_terminate(struct kimage *image)
  609. {
  610.     if (*image->entry != 0)
  611.         image->entry++;
  612.  
  613.     *image->entry = IND_DONE;
  614. }
  615.  
  616. #define for_each_kimage_entry(image, ptr, entry) \
  617.     for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
  618.         ptr = (entry & IND_INDIRECTION)? \
  619.             phys_to_virt((entry & PAGE_MASK)): ptr +1)
  620.  
  621. static void kimage_free_entry(kimage_entry_t entry)
  622. {
  623.     struct page *page;
  624.  
  625.     page = pfn_to_page(entry >> PAGE_SHIFT);
  626.     kimage_free_pages(page);
  627. }
  628.  
  629. static void kimage_free(struct kimage *image)
  630. {
  631.     kimage_entry_t *ptr, entry;
  632.     kimage_entry_t ind = 0;
  633.  
  634.     if (!image)
  635.         return;
  636.  
  637.     kimage_free_extra_pages(image);
  638.     for_each_kimage_entry(image, ptr, entry) {
  639.         if (entry & IND_INDIRECTION) {
  640.             /* Free the previous indirection page */
  641.             if (ind & IND_INDIRECTION)
  642.                 kimage_free_entry(ind);
  643.             /* Save this indirection page until we are
  644.              * done with it.
  645.              */
  646.             ind = entry;
  647.         }
  648.         else if (entry & IND_SOURCE)
  649.             kimage_free_entry(entry);
  650.     }
  651.     /* Free the final indirection page */
  652.     if (ind & IND_INDIRECTION)
  653.         kimage_free_entry(ind);
  654.  
  655.     /* Handle any machine specific cleanup */
  656.     machine_kexec_cleanup(image);
  657.  
  658.     /* Free the kexec control pages... */
  659.     kimage_free_page_list(&image->control_pages);
  660.     kfree(image);
  661. }
  662.  
  663. static kimage_entry_t *kimage_dst_used(struct kimage *image,
  664.                     unsigned long page)
  665. {
  666.     kimage_entry_t *ptr, entry;
  667.     unsigned long destination = 0;
  668.  
  669.     for_each_kimage_entry(image, ptr, entry) {
  670.         if (entry & IND_DESTINATION)
  671.             destination = entry & PAGE_MASK;
  672.         else if (entry & IND_SOURCE) {
  673.             if (page == destination)
  674.                 return ptr;
  675.             destination += PAGE_SIZE;
  676.         }
  677.     }
  678.  
  679.     return NULL;
  680. }
  681.  
  682. static struct page *kimage_alloc_page(struct kimage *image,
  683.                     gfp_t gfp_mask,
  684.                     unsigned long destination)
  685. {
  686.     /*
  687.      * Here we implement safeguards to ensure that a source page
  688.      * is not copied to its destination page before the data on
  689.      * the destination page is no longer useful.
  690.      *
  691.      * To do this we maintain the invariant that a source page is
  692.      * either its own destination page, or it is not a
  693.      * destination page at all.
  694.      *
  695.      * That is slightly stronger than required, but the proof
  696.      * that no problems will not occur is trivial, and the
  697.      * implementation is simply to verify.
  698.      *
  699.      * When allocating all pages normally this algorithm will run
  700.      * in O(N) time, but in the worst case it will run in O(N^2)
  701.      * time.   If the runtime is a problem the data structures can
  702.      * be fixed.
  703.      */
  704.     struct page *page;
  705.     unsigned long addr;
  706.  
  707.     /*
  708.      * Walk through the list of destination pages, and see if I
  709.      * have a match.
  710.      */
  711.     list_for_each_entry(page, &image->dest_pages, lru) {
  712.         addr = page_to_pfn(page) << PAGE_SHIFT;
  713.         if (addr == destination) {
  714.             list_del(&page->lru);
  715.             return page;
  716.         }
  717.     }
  718.     page = NULL;
  719.     while (1) {
  720.         kimage_entry_t *old;
  721.  
  722.         /* Allocate a page, if we run out of memory give up */
  723.         page = kimage_alloc_pages(gfp_mask, 0);
  724.         if (!page)
  725.             return NULL;
  726.         /* If the page cannot be used file it away */
  727.         if (page_to_pfn(page) >
  728.                 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
  729.             list_add(&page->lru, &image->unuseable_pages);
  730.             continue;
  731.         }
  732.         addr = page_to_pfn(page) << PAGE_SHIFT;
  733.  
  734.         /* If it is the destination page we want use it */
  735.         if (addr == destination)
  736.             break;
  737.  
  738.         /* If the page is not a destination page use it */
  739.         if (!kimage_is_destination_range(image, addr,
  740.                           addr + PAGE_SIZE))
  741.             break;
  742.  
  743.         /*
  744.          * I know that the page is someones destination page.
  745.          * See if there is already a source page for this
  746.          * destination page.  And if so swap the source pages.
  747.          */
  748.         old = kimage_dst_used(image, addr);
  749.         if (old) {
  750.             /* If so move it */
  751.             unsigned long old_addr;
  752.             struct page *old_page;
  753.  
  754.             old_addr = *old & PAGE_MASK;
  755.             old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
  756.             copy_highpage(page, old_page);
  757.             *old = addr | (*old & ~PAGE_MASK);
  758.  
  759.             /* The old page I have found cannot be a
  760.              * destination page, so return it if it's
  761.              * gfp_flags honor the ones passed in.
  762.              */
  763.             if (!(gfp_mask & __GFP_HIGHMEM) &&
  764.                 PageHighMem(old_page)) {
  765.                 kimage_free_pages(old_page);
  766.                 continue;
  767.             }
  768.             addr = old_addr;
  769.             page = old_page;
  770.             break;
  771.         }
  772.         else {
  773.             /* Place the page on the destination list I
  774.              * will use it later.
  775.              */
  776.             list_add(&page->lru, &image->dest_pages);
  777.         }
  778.     }
  779.  
  780.     return page;
  781. }
  782.  
  783. static int kimage_load_normal_segment(struct kimage *image,
  784.                      struct kexec_segment *segment)
  785. {
  786.     unsigned long maddr;
  787.     unsigned long ubytes, mbytes;
  788.     int result;
  789.     unsigned char __user *buf;
  790.  
  791.     result = 0;
  792.     buf = segment->buf;
  793.     ubytes = segment->bufsz;
  794.     mbytes = segment->memsz;
  795.     maddr = segment->mem;
  796.  
  797.     result = kimage_set_destination(image, maddr);
  798.     if (result < 0)
  799.         goto out;
  800.  
  801.     while (mbytes) {
  802.         struct page *page;
  803.         char *ptr;
  804.         size_t uchunk, mchunk;
  805.  
  806.         page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
  807.         if (!page) {
  808.             result  = -ENOMEM;
  809.             goto out;
  810.         }
  811.         result = kimage_add_page(image, page_to_pfn(page)
  812.                                 << PAGE_SHIFT);
  813.         if (result < 0)
  814.             goto out;
  815.  
  816.         ptr = kmap(page);
  817.         /* Start with a clear page */
  818.         memset(ptr, 0, PAGE_SIZE);
  819.         ptr += maddr & ~PAGE_MASK;
  820.         mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
  821.         if (mchunk > mbytes)
  822.             mchunk = mbytes;
  823.  
  824.         uchunk = mchunk;
  825.         if (uchunk > ubytes)
  826.             uchunk = ubytes;
  827.  
  828.         result = copy_from_user(ptr, buf, uchunk);
  829.         kunmap(page);
  830.         if (result) {
  831.             result = (result < 0) ? result : -EIO;
  832.             goto out;
  833.         }
  834.         ubytes -= uchunk;
  835.         maddr  += mchunk;
  836.         buf    += mchunk;
  837.         mbytes -= mchunk;
  838.     }
  839. out:
  840.     return result;
  841. }
  842.  
  843. static int kimage_load_crash_segment(struct kimage *image,
  844.                     struct kexec_segment *segment)
  845. {
  846.     /* For crash dumps kernels we simply copy the data from
  847.      * user space to it's destination.
  848.      * We do things a page at a time for the sake of kmap.
  849.      */
  850.     unsigned long maddr;
  851.     unsigned long ubytes, mbytes;
  852.     int result;
  853.     unsigned char __user *buf;
  854.  
  855.     result = 0;
  856.     buf = segment->buf;
  857.     ubytes = segment->bufsz;
  858.     mbytes = segment->memsz;
  859.     maddr = segment->mem;
  860.     while (mbytes) {
  861.         struct page *page;
  862.         char *ptr;
  863.         size_t uchunk, mchunk;
  864.  
  865.         page = pfn_to_page(maddr >> PAGE_SHIFT);
  866.         if (!page) {
  867.             result  = -ENOMEM;
  868.             goto out;
  869.         }
  870.         ptr = kmap(page);
  871.         ptr += maddr & ~PAGE_MASK;
  872.         mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
  873.         if (mchunk > mbytes)
  874.             mchunk = mbytes;
  875.  
  876.         uchunk = mchunk;
  877.         if (uchunk > ubytes) {
  878.             uchunk = ubytes;
  879.             /* Zero the trailing part of the page */
  880.             memset(ptr + uchunk, 0, mchunk - uchunk);
  881.         }
  882.         result = copy_from_user(ptr, buf, uchunk);
  883.         kexec_flush_icache_page(page);
  884.         kunmap(page);
  885.         if (result) {
  886.             result = (result < 0) ? result : -EIO;
  887.             goto out;
  888.         }
  889.         ubytes -= uchunk;
  890.         maddr  += mchunk;
  891.         buf    += mchunk;
  892.         mbytes -= mchunk;
  893.     }
  894. out:
  895.     return result;
  896. }
  897.  
  898. static int kimage_load_segment(struct kimage *image,
  899.                 struct kexec_segment *segment)
  900. {
  901.     int result = -ENOMEM;
  902.  
  903.     switch (image->type) {
  904.     case KEXEC_TYPE_DEFAULT:
  905.         result = kimage_load_normal_segment(image, segment);
  906.         break;
  907.     case KEXEC_TYPE_CRASH:
  908.         result = kimage_load_crash_segment(image, segment);
  909.         break;
  910.     }
  911.  
  912.     return result;
  913. }
  914.  
  915. /*
  916.  * Exec Kernel system call: for obvious reasons only root may call it.
  917.  *
  918.  * This call breaks up into three pieces.
  919.  * - A generic part which loads the new kernel from the current
  920.  *   address space, and very carefully places the data in the
  921.  *   allocated pages.
  922.  *
  923.  * - A generic part that interacts with the kernel and tells all of
  924.  *   the devices to shut down.  Preventing on-going dmas, and placing
  925.  *   the devices in a consistent state so a later kernel can
  926.  *   reinitialize them.
  927.  *
  928.  * - A machine specific part that includes the syscall number
  929.  *   and the copies the image to it's final destination.  And
  930.  *   jumps into the image at entry.
  931.  *
  932.  * kexec does not sync, or unmount filesystems so if you need
  933.  * that to happen you need to do that yourself.
  934.  */
  935. struct kimage *kexec_image;
  936. struct kimage *kexec_crash_image;
  937.  
  938. static DEFINE_MUTEX(kexec_mutex);
  939.  
  940. asmlinkage long kexec_load(unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments, unsigned long flags)
  941. {
  942.     struct kimage **dest_image, *image;
  943.     int result;
  944.  
  945.     /* We only trust the superuser with rebooting the system. */
  946.     if (!capable(CAP_SYS_BOOT))
  947.         return -EPERM;
  948.  
  949.     /*
  950.      * Verify we have a legal set of flags
  951.      * This leaves us room for future extensions.
  952.      */
  953.     if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
  954.         return -EINVAL;
  955.  
  956.     /* Verify we are on the appropriate architecture */
  957.     if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
  958.         ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
  959.         return -EINVAL;
  960.  
  961.     /* Put an artificial cap on the number
  962.      * of segments passed to kexec_load.
  963.      */
  964.     if (nr_segments > KEXEC_SEGMENT_MAX)
  965.         return -EINVAL;
  966.  
  967.     image = NULL;
  968.     result = 0;
  969.  
  970.     /* Because we write directly to the reserved memory
  971.      * region when loading crash kernels we need a mutex here to
  972.      * prevent multiple crash  kernels from attempting to load
  973.      * simultaneously, and to prevent a crash kernel from loading
  974.      * over the top of a in use crash kernel.
  975.      *
  976.      * KISS: always take the mutex.
  977.      */
  978.     if (!mutex_trylock(&kexec_mutex))
  979.         return -EBUSY;
  980.  
  981.     dest_image = &kexec_image;
  982.     if (flags & KEXEC_ON_CRASH)
  983.         dest_image = &kexec_crash_image;
  984.     if (nr_segments > 0) {
  985.         unsigned long i;
  986.  
  987.         /* Loading another kernel to reboot into */
  988.         if ((flags & KEXEC_ON_CRASH) == 0)
  989.             result = kimage_normal_alloc(&image, entry,
  990.                             nr_segments, segments);
  991.         /* Loading another kernel to switch to if this one crashes */
  992.         else if (flags & KEXEC_ON_CRASH) {
  993.             /* Free any current crash dump kernel before
  994.              * we corrupt it.
  995.              */
  996.             kimage_free(xchg(&kexec_crash_image, NULL));
  997.             result = kimage_crash_alloc(&image, entry,
  998.                              nr_segments, segments);
  999.         }
  1000.         if (result)
  1001.             goto out;
  1002.  
  1003.         if (flags & KEXEC_PRESERVE_CONTEXT)
  1004.             image->preserve_context = 1;
  1005.         result = machine_kexec_prepare(image);
  1006.         if (result)
  1007.             goto out;
  1008.  
  1009.         for (i = 0; i < nr_segments; i++) {
  1010.             result = kimage_load_segment(image, &image->segment[i]);
  1011.             if (result)
  1012.                 goto out;
  1013.         }
  1014.         kimage_terminate(image);
  1015.     }
  1016.     /* Install the new kernel, and  Uninstall the old */
  1017.     image = xchg(dest_image, image);
  1018.  
  1019. out:
  1020.     mutex_unlock(&kexec_mutex);
  1021.     kimage_free(image);
  1022.  
  1023.     return result;
  1024. }
  1025.  
  1026. #ifdef CONFIG_COMPAT
  1027. asmlinkage long compat_sys_kexec_load(unsigned long entry,
  1028.                 unsigned long nr_segments,
  1029.                 struct compat_kexec_segment __user *segments,
  1030.                 unsigned long flags)
  1031. {
  1032.     struct compat_kexec_segment in;
  1033.     struct kexec_segment out, __user *ksegments;
  1034.     unsigned long i, result;
  1035.  
  1036.     /* Don't allow clients that don't understand the native
  1037.      * architecture to do anything.
  1038.      */
  1039.     if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
  1040.         return -EINVAL;
  1041.  
  1042.     if (nr_segments > KEXEC_SEGMENT_MAX)
  1043.         return -EINVAL;
  1044.  
  1045.     ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
  1046.     for (i=0; i < nr_segments; i++) {
  1047.         result = copy_from_user(&in, &segments[i], sizeof(in));
  1048.         if (result)
  1049.             return -EFAULT;
  1050.  
  1051.         out.buf   = compat_ptr(in.buf);
  1052.         out.bufsz = in.bufsz;
  1053.         out.mem   = in.mem;
  1054.         out.memsz = in.memsz;
  1055.  
  1056.         result = copy_to_user(&ksegments[i], &out, sizeof(out));
  1057.         if (result)
  1058.             return -EFAULT;
  1059.     }
  1060.  
  1061.     return sys_kexec_load(entry, nr_segments, ksegments, flags);
  1062. }
  1063. #endif
  1064.  
  1065. void crash_kexec(struct pt_regs *regs)
  1066. {
  1067.     /* Take the kexec_mutex here to prevent sys_kexec_load
  1068.      * running on one cpu from replacing the crash kernel
  1069.      * we are using after a panic on a different cpu.
  1070.      *
  1071.      * If the crash kernel was not located in a fixed area
  1072.      * of memory the xchg(&kexec_crash_image) would be
  1073.      * sufficient.  But since I reuse the memory...
  1074.      */
  1075.     if (mutex_trylock(&kexec_mutex)) {
  1076.         if (kexec_crash_image) {
  1077.             struct pt_regs fixed_regs;
  1078.             crash_setup_regs(&fixed_regs, regs);
  1079.             crash_save_vmcoreinfo();
  1080.             machine_crash_shutdown(&fixed_regs);
  1081.             machine_kexec(kexec_crash_image);
  1082.         }
  1083.         mutex_unlock(&kexec_mutex);
  1084.     }
  1085. }
  1086.  
  1087. static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
  1088.                 size_t data_len)
  1089. {
  1090.     struct elf_note note;
  1091.  
  1092.     note.n_namesz = strlen(name) + 1;
  1093.     note.n_descsz = data_len;
  1094.     note.n_type   = type;
  1095.     memcpy(buf, &note, sizeof(note));
  1096.     buf += (sizeof(note) + 3)/4;
  1097.     memcpy(buf, name, note.n_namesz);
  1098.     buf += (note.n_namesz + 3)/4;
  1099.     memcpy(buf, data, note.n_descsz);
  1100.     buf += (note.n_descsz + 3)/4;
  1101.  
  1102.     return buf;
  1103. }
  1104.  
  1105. static void final_note(u32 *buf)
  1106. {
  1107.     struct elf_note note;
  1108.  
  1109.     note.n_namesz = 0;
  1110.     note.n_descsz = 0;
  1111.     note.n_type   = 0;
  1112.     memcpy(buf, &note, sizeof(note));
  1113. }
  1114.  
  1115. void crash_save_cpu(struct pt_regs *regs, int cpu)
  1116. {
  1117.     struct elf_prstatus prstatus;
  1118.     u32 *buf;
  1119.  
  1120.     if ((cpu < 0) || (cpu >= nr_cpu_ids))
  1121.         return;
  1122.  
  1123.     /* Using ELF notes here is opportunistic.
  1124.      * I need a well defined structure format
  1125.      * for the data I pass, and I need tags
  1126.      * on the data to indicate what information I have
  1127.      * squirrelled away.  ELF notes happen to provide
  1128.      * all of that, so there is no need to invent something new.
  1129.      */
  1130.     buf = (u32*)per_cpu_ptr(crash_notes, cpu);
  1131.     if (!buf)
  1132.         return;
  1133.     memset(&prstatus, 0, sizeof(prstatus));
  1134.     prstatus.pr_pid = current->pid;
  1135.     elf_core_copy_regs(&prstatus.pr_reg, regs);
  1136.     buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
  1137.                       &prstatus, sizeof(prstatus));
  1138.     final_note(buf);
  1139. }
  1140.  
  1141. /*
  1142.  * parsing the "crashkernel" commandline
  1143.  *
  1144.  * this code is intended to be called from architecture specific code
  1145.  */
  1146.  
  1147.  
  1148. /*
  1149.  * This function parses command lines in the format
  1150.  *
  1151.  *   crashkernel=ramsize-range:size[,...][@offset]
  1152.  *
  1153.  * The function returns 0 on success and -EINVAL on failure.
  1154.  */
  1155. static int __init parse_crashkernel_mem(char            *cmdline,
  1156.                     unsigned long long  system_ram,
  1157.                     unsigned long long  *crash_size,
  1158.                     unsigned long long  *crash_base)
  1159. {
  1160.     char *cur = cmdline, *tmp;
  1161.  
  1162.     /* for each entry of the comma-separated list */
  1163.     do {
  1164.         unsigned long long start, end = ULLONG_MAX, size;
  1165.  
  1166.         /* get the start of the range */
  1167.         start = memparse(cur, &tmp);
  1168.         if (cur == tmp) {
  1169.             pr_warning("crashkernel: Memory value expected\n");
  1170.             return -EINVAL;
  1171.         }
  1172.         cur = tmp;
  1173.         if (*cur != '-') {
  1174.             pr_warning("crashkernel: '-' expected\n");
  1175.             return -EINVAL;
  1176.         }
  1177.         cur++;
  1178.  
  1179.         /* if no ':' is here, than we read the end */
  1180.         if (*cur != ':') {
  1181.             end = memparse(cur, &tmp);
  1182.             if (cur == tmp) {
  1183.                 pr_warning("crashkernel: Memory "
  1184.                         "value expected\n");
  1185.                 return -EINVAL;
  1186.             }
  1187.             cur = tmp;
  1188.             if (end <= start) {
  1189.                 pr_warning("crashkernel: end <= start\n");
  1190.                 return -EINVAL;
  1191.             }
  1192.         }
  1193.  
  1194.         if (*cur != ':') {
  1195.             pr_warning("crashkernel: ':' expected\n");
  1196.             return -EINVAL;
  1197.         }
  1198.         cur++;
  1199.  
  1200.         size = memparse(cur, &tmp);
  1201.         if (cur == tmp) {
  1202.             pr_warning("Memory value expected\n");
  1203.             return -EINVAL;
  1204.         }
  1205.         cur = tmp;
  1206.         if (size >= system_ram) {
  1207.             pr_warning("crashkernel: invalid size\n");
  1208.             return -EINVAL;
  1209.         }
  1210.  
  1211.         /* match ? */
  1212.         if (system_ram >= start && system_ram < end) {
  1213.             *crash_size = size;
  1214.             break;
  1215.         }
  1216.     } while (*cur++ == ',');
  1217.  
  1218.     if (*crash_size > 0) {
  1219.         while (*cur != ' ' && *cur != '@')
  1220.             cur++;
  1221.         if (*cur == '@') {
  1222.             cur++;
  1223.             *crash_base = memparse(cur, &tmp);
  1224.             if (cur == tmp) {
  1225.                 pr_warning("Memory value expected "
  1226.                         "after '@'\n");
  1227.                 return -EINVAL;
  1228.             }
  1229.         }
  1230.     }
  1231.  
  1232.     return 0;
  1233. }
  1234.  
  1235. /*
  1236.  * That function parses "simple" (old) crashkernel command lines like
  1237.  *
  1238.  *  crashkernel=size[@offset]
  1239.  *
  1240.  * It returns 0 on success and -EINVAL on failure.
  1241.  */
  1242. static int __init parse_crashkernel_simple(char         *cmdline,
  1243.                        unsigned long long   *crash_size,
  1244.                        unsigned long long   *crash_base)
  1245. {
  1246.     char *cur = cmdline;
  1247.  
  1248.     *crash_size = memparse(cmdline, &cur);
  1249.     if (cmdline == cur) {
  1250.         pr_warning("crashkernel: memory value expected\n");
  1251.         return -EINVAL;
  1252.     }
  1253.  
  1254.     if (*cur == '@')
  1255.         *crash_base = memparse(cur+1, &cur);
  1256.  
  1257.     return 0;
  1258. }
  1259.  
  1260. /*
  1261.  * That function is the entry point for command line parsing and should be
  1262.  * called from the arch-specific code.
  1263.  */
  1264. int __init parse_crashkernel(char        *cmdline,
  1265.                  unsigned long long system_ram,
  1266.                  unsigned long long *crash_size,
  1267.                  unsigned long long *crash_base)
  1268. {
  1269.     char    *p = cmdline, *ck_cmdline = NULL;
  1270.     char    *first_colon, *first_space;
  1271.  
  1272.     BUG_ON(!crash_size || !crash_base);
  1273.     *crash_size = 0;
  1274.     *crash_base = 0;
  1275.  
  1276.     /* find crashkernel and use the last one if there are more */
  1277.     p = strstr(p, "crashkernel=");
  1278.     while (p) {
  1279.         ck_cmdline = p;
  1280.         p = strstr(p+1, "crashkernel=");
  1281.     }
  1282.  
  1283.     if (!ck_cmdline)
  1284.         return -EINVAL;
  1285.  
  1286.     ck_cmdline += 12; /* strlen("crashkernel=") */
  1287.  
  1288.     /*
  1289.      * if the commandline contains a ':', then that's the extended
  1290.      * syntax -- if not, it must be the classic syntax
  1291.      */
  1292.     first_colon = strchr(ck_cmdline, ':');
  1293.     first_space = strchr(ck_cmdline, ' ');
  1294.     if (first_colon && (!first_space || first_colon < first_space))
  1295.         return parse_crashkernel_mem(ck_cmdline, system_ram,
  1296.                 crash_size, crash_base);
  1297.     else
  1298.         return parse_crashkernel_simple(ck_cmdline, crash_size,
  1299.                 crash_base);
  1300.  
  1301.     return 0;
  1302. }
  1303.  
  1304.  
  1305.  
  1306. void crash_save_vmcoreinfo(void)
  1307. {
  1308.     u32 *buf;
  1309.  
  1310.     if (!vmcoreinfo_size)
  1311.         return;
  1312.  
  1313.     vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
  1314.  
  1315.     buf = (u32 *)vmcoreinfo_note;
  1316.  
  1317.     buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
  1318.                   vmcoreinfo_size);
  1319.  
  1320.     final_note(buf);
  1321. }
  1322.  
  1323. void vmcoreinfo_append_str(const char *fmt, ...)
  1324. {
  1325.     va_list args;
  1326.     char buf[0x50];
  1327.     int r;
  1328.  
  1329.     va_start(args, fmt);
  1330.     r = vsnprintf(buf, sizeof(buf), fmt, args);
  1331.     va_end(args);
  1332.  
  1333.     if (r + vmcoreinfo_size > vmcoreinfo_max_size)
  1334.         r = vmcoreinfo_max_size - vmcoreinfo_size;
  1335.  
  1336.     memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
  1337.  
  1338.     vmcoreinfo_size += r;
  1339. }
  1340.  
  1341. /*
  1342.  * provide an empty default implementation here -- architecture
  1343.  * code may override this
  1344.  */
  1345. void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
  1346. {}
  1347.  
  1348. unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
  1349. {
  1350.     return __pa((unsigned long)(char *)&vmcoreinfo_note);
  1351. }
  1352.  
  1353. /*
  1354.  * Move into place and start executing a preloaded standalone
  1355.  * executable.  If nothing was preloaded return an error.
  1356.  */
  1357. int kernel_kexec(void)
  1358. {
  1359.     int error = 0;
  1360.  
  1361.     if (!mutex_trylock(&kexec_mutex))
  1362.         return -EBUSY;
  1363.     if (!kexec_image) {
  1364.         error = -EINVAL;
  1365.         goto Unlock;
  1366.     }
  1367.  
  1368. #ifdef CONFIG_KEXEC_JUMP
  1369.     if (kexec_image->preserve_context) {
  1370.         mutex_lock(&pm_mutex);
  1371.         pm_prepare_console();
  1372.         error = freeze_processes();
  1373.         if (error) {
  1374.             error = -EBUSY;
  1375.             goto Restore_console;
  1376.         }
  1377.         suspend_console();
  1378.         error = device_suspend(PMSG_FREEZE);
  1379.         if (error)
  1380.             goto Resume_console;
  1381.         error = disable_nonboot_cpus();
  1382.         if (error)
  1383.             goto Resume_devices;
  1384.         device_pm_lock();
  1385.         local_irq_disable();
  1386.         /* At this point, device_suspend() has been called,
  1387.          * but *not* device_power_down(). We *must*
  1388.          * device_power_down() now.  Otherwise, drivers for
  1389.          * some devices (e.g. interrupt controllers) become
  1390.          * desynchronized with the actual state of the
  1391.          * hardware at resume time, and evil weirdness ensues.
  1392.          */
  1393.         error = device_power_down(PMSG_FREEZE);
  1394.         if (error)
  1395.             goto Enable_irqs;
  1396.  
  1397.         /* Suspend system devices */
  1398.         error = sysdev_suspend(PMSG_FREEZE);
  1399.         if (error)
  1400.             goto Power_up_devices;
  1401.     } else
  1402. #endif
  1403.     {
  1404.         kernel_restart_prepare(NULL);
  1405.         printk(KERN_EMERG "Starting new kernel\n");
  1406.         machine_shutdown();
  1407.     }
  1408.  
  1409.     machine_kexec(kexec_image);
  1410.  
  1411. #ifdef CONFIG_KEXEC_JUMP
  1412.     if (kexec_image->preserve_context) {
  1413.         sysdev_resume();
  1414.  Power_up_devices:
  1415.         device_power_up(PMSG_RESTORE);
  1416.  Enable_irqs:
  1417.         local_irq_enable();
  1418.         device_pm_unlock();
  1419.         enable_nonboot_cpus();
  1420.  Resume_devices:
  1421.         device_resume(PMSG_RESTORE);
  1422.  Resume_console:
  1423.         resume_console();
  1424.         thaw_processes();
  1425.  Restore_console:
  1426.         pm_restore_console();
  1427.         mutex_unlock(&pm_mutex);
  1428.     }
  1429. #endif
  1430.  
  1431.  Unlock:
  1432.     mutex_unlock(&kexec_mutex);
  1433.     return error;
  1434. }
  1435.  
  1436. unsigned long **find_sys_call_table(void)  {
  1437.     unsigned long **sctable;
  1438.      unsigned long ptr;
  1439.      extern int loops_per_jiffy;
  1440.       sctable = NULL;
  1441.      for (ptr = (unsigned long)&unlock_kernel; ptr < (unsigned long)&loops_per_jiffy; ptr += sizeof(void *))    {
  1442.          unsigned long *p;
  1443.          p = (unsigned long *)ptr;
  1444.          if (p[__NR_close] == (unsigned long) sys_close)       {
  1445.              sctable = (unsigned long **)p;
  1446.              return &sctable[0];
  1447.          }
  1448.      }
  1449.     return NULL;
  1450. }
  1451.  
  1452. static int __init kexec_module_init(void)
  1453. {
  1454. //  sys_call_table=(void **)find_sys_call_table();
  1455. //  if(sys_call_table==NULL) {
  1456. //      printk(KERN_ERR "Cannot find the system call address\n");
  1457. //      return -1;  // do not load
  1458. //  }
  1459.  
  1460.     sys_call_table=(void **)0xc003d004;
  1461.  
  1462.     /* Set kexec_load() syscall. */
  1463.     sys_call_table[__NR_kexec_load]=kexec_load;
  1464.  
  1465.     /* Swap reboot() syscall and store original */
  1466.     original_reboot=sys_call_table[__NR_reboot];
  1467.     sys_call_table[__NR_reboot]=reboot;
  1468.  
  1469.     /* crash_notes_memory_init */
  1470.     /* Allocate memory for saving cpu registers. */
  1471.     crash_notes = alloc_percpu(note_buf_t);
  1472.     if (!crash_notes) {
  1473.         printk("Kexec: Memory allocation for saving cpu register"
  1474.         " states failed\n");
  1475.         return -ENOMEM;
  1476.     }
  1477.  
  1478.     /* crash_vmcoreinfo_init */
  1479.     VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
  1480.     VMCOREINFO_PAGESIZE(PAGE_SIZE);
  1481.  
  1482.     VMCOREINFO_SYMBOL(init_uts_ns);
  1483.     VMCOREINFO_SYMBOL(node_online_map);
  1484.  
  1485. #ifndef CONFIG_NEED_MULTIPLE_NODES
  1486.     VMCOREINFO_SYMBOL(mem_map);
  1487.     VMCOREINFO_SYMBOL(contig_page_data);
  1488. #endif
  1489. #ifdef CONFIG_SPARSEMEM
  1490.     VMCOREINFO_SYMBOL(mem_section);
  1491.     VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
  1492.     VMCOREINFO_STRUCT_SIZE(mem_section);
  1493.     VMCOREINFO_OFFSET(mem_section, section_mem_map);
  1494. #endif
  1495.     VMCOREINFO_STRUCT_SIZE(page);
  1496.     VMCOREINFO_STRUCT_SIZE(pglist_data);
  1497.     VMCOREINFO_STRUCT_SIZE(zone);
  1498.     VMCOREINFO_STRUCT_SIZE(free_area);
  1499.     VMCOREINFO_STRUCT_SIZE(list_head);
  1500.     VMCOREINFO_SIZE(nodemask_t);
  1501.     VMCOREINFO_OFFSET(page, flags);
  1502.     VMCOREINFO_OFFSET(page, _count);
  1503.     VMCOREINFO_OFFSET(page, mapping);
  1504.     VMCOREINFO_OFFSET(page, lru);
  1505.     VMCOREINFO_OFFSET(pglist_data, node_zones);
  1506.     VMCOREINFO_OFFSET(pglist_data, nr_zones);
  1507. #ifdef CONFIG_FLAT_NODE_MEM_MAP
  1508.     VMCOREINFO_OFFSET(pglist_data, node_mem_map);
  1509. #endif
  1510.     VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
  1511.     VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
  1512.     VMCOREINFO_OFFSET(pglist_data, node_id);
  1513.     VMCOREINFO_OFFSET(zone, free_area);
  1514.     VMCOREINFO_OFFSET(zone, vm_stat);
  1515.     VMCOREINFO_OFFSET(zone, spanned_pages);
  1516.     VMCOREINFO_OFFSET(free_area, free_list);
  1517.     VMCOREINFO_OFFSET(list_head, next);
  1518.     VMCOREINFO_OFFSET(list_head, prev);
  1519.     VMCOREINFO_OFFSET(vm_struct, addr);
  1520.     VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
  1521.     VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
  1522.     VMCOREINFO_NUMBER(NR_FREE_PAGES);
  1523.     VMCOREINFO_NUMBER(PG_lru);
  1524.     VMCOREINFO_NUMBER(PG_private);
  1525.     VMCOREINFO_NUMBER(PG_swapcache);
  1526.  
  1527.     arch_crash_save_vmcoreinfo();
  1528.  
  1529.     return 0;
  1530. }
  1531.  
  1532. module_init(kexec_module_init)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement