Guest User

Untitled

a guest
Jun 7th, 2018
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 82.79 KB | None | 0 0
  1. /*
  2.  *  linux/fs/proc/base.c
  3.  *
  4.  *  Copyright (C) 1991, 1992 Linus Torvalds
  5.  *
  6.  *  proc base directory handling functions
  7.  *
  8.  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
  9.  *  Instead of using magical inumbers to determine the kind of object
  10.  *  we allocate and fill in-core inodes upon lookup. They don't even
  11.  *  go into icache. We cache the reference to task_struct upon lookup too.
  12.  *  Eventually it should become a filesystem in its own. We don't use the
  13.  *  rest of procfs anymore.
  14.  *
  15.  *
  16.  *  Changelog:
  17.  *  17-Jan-2005
  18.  *  Allan Bezerra
  19.  *  Bruna Moreira <bruna.moreira@indt.org.br>
  20.  *  Edjard Mota <edjard.mota@indt.org.br>
  21.  *  Ilias Biris <ilias.biris@indt.org.br>
  22.  *  Mauricio Lin <mauricio.lin@indt.org.br>
  23.  *
  24.  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  25.  *
  26.  *  A new process specific entry (smaps) included in /proc. It shows the
  27.  *  size of rss for each memory area. The maps entry lacks information
  28.  *  about physical memory size (rss) for each mapped file, i.e.,
  29.  *  rss information for executables and library files.
  30.  *  This additional information is useful for any tools that need to know
  31.  *  about physical memory consumption for a process specific library.
  32.  *
  33.  *  Changelog:
  34.  *  21-Feb-2005
  35.  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  36.  *  Pud inclusion in the page table walking.
  37.  *
  38.  *  ChangeLog:
  39.  *  10-Mar-2005
  40.  *  10LE Instituto Nokia de Tecnologia - INdT:
  41.  *  A better way to walks through the page table as suggested by Hugh Dickins.
  42.  *
  43.  *  Simo Piiroinen <simo.piiroinen@nokia.com>:
  44.  *  Smaps information related to shared, private, clean and dirty pages.
  45.  *
  46.  *  Paul Mundt <paul.mundt@nokia.com>:
  47.  *  Overall revision about smaps.
  48.  */
  49.  
  50. #include <asm/uaccess.h>
  51.  
  52. #include <linux/errno.h>
  53. #include <linux/time.h>
  54. #include <linux/proc_fs.h>
  55. #include <linux/stat.h>
  56. #include <linux/task_io_accounting_ops.h>
  57. #include <linux/init.h>
  58. #include <linux/capability.h>
  59. #include <linux/file.h>
  60. #include <linux/fdtable.h>
  61. #include <linux/string.h>
  62. #include <linux/seq_file.h>
  63. #include <linux/namei.h>
  64. #include <linux/mnt_namespace.h>
  65. #include <linux/mm.h>
  66. #include <linux/swap.h>
  67. #include <linux/rcupdate.h>
  68. #include <linux/kallsyms.h>
  69. #include <linux/stacktrace.h>
  70. #include <linux/resource.h>
  71. #include <linux/module.h>
  72. #include <linux/mount.h>
  73. #include <linux/security.h>
  74. #include <linux/ptrace.h>
  75. #include <linux/tracehook.h>
  76. #include <linux/cgroup.h>
  77. #include <linux/cpuset.h>
  78. #include <linux/audit.h>
  79. #include <linux/poll.h>
  80. #include <linux/nsproxy.h>
  81. #include <linux/oom.h>
  82. #include <linux/elf.h>
  83. #include <linux/pid_namespace.h>
  84. #include <linux/fs_struct.h>
  85. #include <linux/slab.h>
  86. #ifdef CONFIG_HARDWALL
  87. #include <asm/hardwall.h>
  88. #endif
  89. #include "internal.h"
  90.  
  91. /* NOTE:
  92.  *  Implementing inode permission operations in /proc is almost
  93.  *  certainly an error.  Permission checks need to happen during
  94.  *  each system call not at open time.  The reason is that most of
  95.  *  what we wish to check for permissions in /proc varies at runtime.
  96.  *
  97.  *  The classic example of a problem is opening file descriptors
  98.  *  in /proc for a task before it execs a suid executable.
  99.  */
  100.  
  101. struct pid_entry {
  102.     char *name;
  103.     int len;
  104.     mode_t mode;
  105.     const struct inode_operations *iop;
  106.     const struct file_operations *fop;
  107.     union proc_op op;
  108. };
  109.  
  110. #define NOD(NAME, MODE, IOP, FOP, OP) {         \
  111.     .name = (NAME),                 \
  112.     .len  = sizeof(NAME) - 1,           \
  113.     .mode = MODE,                   \
  114.     .iop  = IOP,                    \
  115.     .fop  = FOP,                    \
  116.     .op   = OP,                 \
  117. }
  118.  
  119. #define DIR(NAME, MODE, iops, fops) \
  120.     NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
  121. #define LNK(NAME, get_link)                 \
  122.     NOD(NAME, (S_IFLNK|S_IRWXUGO),              \
  123.         &proc_pid_link_inode_operations, NULL,      \
  124.         { .proc_get_link = get_link } )
  125. #define REG(NAME, MODE, fops)               \
  126.     NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
  127. #define INF(NAME, MODE, read)               \
  128.     NOD(NAME, (S_IFREG|(MODE)),             \
  129.         NULL, &proc_info_file_operations,   \
  130.         { .proc_read = read } )
  131. #define ONE(NAME, MODE, show)               \
  132.     NOD(NAME, (S_IFREG|(MODE)),             \
  133.         NULL, &proc_single_file_operations, \
  134.         { .proc_show = show } )
  135.  
  136. /*
  137.  * Count the number of hardlinks for the pid_entry table, excluding the .
  138.  * and .. links.
  139.  */
  140. static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
  141.     unsigned int n)
  142. {
  143.     unsigned int i;
  144.     unsigned int count;
  145.  
  146.     count = 0;
  147.     for (i = 0; i < n; ++i) {
  148.         if (S_ISDIR(entries[i].mode))
  149.             ++count;
  150.     }
  151.  
  152.     return count;
  153. }
  154.  
  155. static int get_task_root(struct task_struct *task, struct path *root)
  156. {
  157.     int result = -ENOENT;
  158.  
  159.     task_lock(task);
  160.     if (task->fs) {
  161.         get_fs_root(task->fs, root);
  162.         result = 0;
  163.     }
  164.     task_unlock(task);
  165.     return result;
  166. }
  167.  
  168. static int proc_cwd_link(struct inode *inode, struct path *path)
  169. {
  170.     struct task_struct *task = get_proc_task(inode);
  171.     int result = -ENOENT;
  172.  
  173.     if (task) {
  174.         task_lock(task);
  175.         if (task->fs) {
  176.             get_fs_pwd(task->fs, path);
  177.             result = 0;
  178.         }
  179.         task_unlock(task);
  180.         put_task_struct(task);
  181.     }
  182.     return result;
  183. }
  184.  
  185. static int proc_root_link(struct inode *inode, struct path *path)
  186. {
  187.     struct task_struct *task = get_proc_task(inode);
  188.     int result = -ENOENT;
  189.  
  190.     if (task) {
  191.         result = get_task_root(task, path);
  192.         put_task_struct(task);
  193.     }
  194.     return result;
  195. }
  196.  
  197. static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  198. {
  199.     struct mm_struct *mm;
  200.     int err;
  201.  
  202.     err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
  203.     if (err)
  204.         return ERR_PTR(err);
  205.  
  206.     mm = get_task_mm(task);
  207.     if (mm && mm != current->mm &&
  208.             !ptrace_may_access(task, mode)) {
  209.         mmput(mm);
  210.         mm = ERR_PTR(-EACCES);
  211.     }
  212.     mutex_unlock(&task->signal->cred_guard_mutex);
  213.  
  214.     return mm;
  215. }
  216.  
  217. struct mm_struct *mm_for_maps(struct task_struct *task)
  218. {
  219.     return mm_access(task, PTRACE_MODE_READ);
  220. }
  221.  
  222. static int proc_pid_cmdline(struct task_struct *task, char * buffer)
  223. {
  224.     int res = 0;
  225.     unsigned int len;
  226.     struct mm_struct *mm = get_task_mm(task);
  227.     if (!mm)
  228.         goto out;
  229.     if (!mm->arg_end)
  230.         goto out_mm;    /* Shh! No looking before we're done */
  231.  
  232.     len = mm->arg_end - mm->arg_start;
  233.  
  234.     if (len > PAGE_SIZE)
  235.         len = PAGE_SIZE;
  236.  
  237.     res = access_process_vm(task, mm->arg_start, buffer, len, 0);
  238.  
  239.     // If the nul at the end of args has been overwritten, then
  240.     // assume application is using setproctitle(3).
  241.     if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
  242.         len = strnlen(buffer, res);
  243.         if (len < res) {
  244.             res = len;
  245.         } else {
  246.             len = mm->env_end - mm->env_start;
  247.             if (len > PAGE_SIZE - res)
  248.                 len = PAGE_SIZE - res;
  249.             res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
  250.             res = strnlen(buffer, res);
  251.         }
  252.     }
  253. out_mm:
  254.     mmput(mm);
  255. out:
  256.     return res;
  257. }
  258.  
  259. static int proc_pid_auxv(struct task_struct *task, char *buffer)
  260. {
  261.     struct mm_struct *mm = mm_for_maps(task);
  262.     int res = PTR_ERR(mm);
  263.     if (mm && !IS_ERR(mm)) {
  264.         unsigned int nwords = 0;
  265.         do {
  266.             nwords += 2;
  267.         } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
  268.         res = nwords * sizeof(mm->saved_auxv[0]);
  269.         if (res > PAGE_SIZE)
  270.             res = PAGE_SIZE;
  271.         memcpy(buffer, mm->saved_auxv, res);
  272.         mmput(mm);
  273.     }
  274.     return res;
  275. }
  276.  
  277.  
  278. #ifdef CONFIG_KALLSYMS
  279. /*
  280.  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
  281.  * Returns the resolved symbol.  If that fails, simply return the address.
  282.  */
  283. static int proc_pid_wchan(struct task_struct *task, char *buffer)
  284. {
  285.     unsigned long wchan;
  286.     char symname[KSYM_NAME_LEN];
  287.  
  288.     wchan = get_wchan(task);
  289.  
  290.     if (lookup_symbol_name(wchan, symname) < 0)
  291.         if (!ptrace_may_access(task, PTRACE_MODE_READ))
  292.             return 0;
  293.         else
  294.             return sprintf(buffer, "%lu", wchan);
  295.     else
  296.         return sprintf(buffer, "%s", symname);
  297. }
  298. #endif /* CONFIG_KALLSYMS */
  299.  
  300. static int lock_trace(struct task_struct *task)
  301. {
  302.     int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
  303.     if (err)
  304.         return err;
  305.     if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
  306.         mutex_unlock(&task->signal->cred_guard_mutex);
  307.         return -EPERM;
  308.     }
  309.     return 0;
  310. }
  311.  
  312. static void unlock_trace(struct task_struct *task)
  313. {
  314.     mutex_unlock(&task->signal->cred_guard_mutex);
  315. }
  316.  
  317. #ifdef CONFIG_STACKTRACE
  318.  
  319. #define MAX_STACK_TRACE_DEPTH   64
  320.  
  321. static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
  322.               struct pid *pid, struct task_struct *task)
  323. {
  324.     struct stack_trace trace;
  325.     unsigned long *entries;
  326.     int err;
  327.     int i;
  328.  
  329.     entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
  330.     if (!entries)
  331.         return -ENOMEM;
  332.  
  333.     trace.nr_entries    = 0;
  334.     trace.max_entries   = MAX_STACK_TRACE_DEPTH;
  335.     trace.entries       = entries;
  336.     trace.skip      = 0;
  337.  
  338.     err = lock_trace(task);
  339.     if (!err) {
  340.         save_stack_trace_tsk(task, &trace);
  341.  
  342.         for (i = 0; i < trace.nr_entries; i++) {
  343.             seq_printf(m, "[<%pK>] %pS\n",
  344.                    (void *)entries[i], (void *)entries[i]);
  345.         }
  346.         unlock_trace(task);
  347.     }
  348.     kfree(entries);
  349.  
  350.     return err;
  351. }
  352. #endif
  353.  
  354. #ifdef CONFIG_SCHEDSTATS
  355. /*
  356.  * Provides /proc/PID/schedstat
  357.  */
  358. static int proc_pid_schedstat(struct task_struct *task, char *buffer)
  359. {
  360.     return sprintf(buffer, "%llu %llu %lu\n",
  361.             (unsigned long long)task->se.sum_exec_runtime,
  362.             (unsigned long long)task->sched_info.run_delay,
  363.             task->sched_info.pcount);
  364. }
  365. #endif
  366.  
  367. #ifdef CONFIG_LATENCYTOP
  368. static int lstats_show_proc(struct seq_file *m, void *v)
  369. {
  370.     int i;
  371.     struct inode *inode = m->private;
  372.     struct task_struct *task = get_proc_task(inode);
  373.  
  374.     if (!task)
  375.         return -ESRCH;
  376.     seq_puts(m, "Latency Top version : v0.1\n");
  377.     for (i = 0; i < 32; i++) {
  378.         struct latency_record *lr = &task->latency_record[i];
  379.         if (lr->backtrace[0]) {
  380.             int q;
  381.             seq_printf(m, "%i %li %li",
  382.                    lr->count, lr->time, lr->max);
  383.             for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
  384.                 unsigned long bt = lr->backtrace[q];
  385.                 if (!bt)
  386.                     break;
  387.                 if (bt == ULONG_MAX)
  388.                     break;
  389.                 seq_printf(m, " %ps", (void *)bt);
  390.             }
  391.             seq_putc(m, '\n');
  392.         }
  393.  
  394.     }
  395.     put_task_struct(task);
  396.     return 0;
  397. }
  398.  
  399. static int lstats_open(struct inode *inode, struct file *file)
  400. {
  401.     return single_open(file, lstats_show_proc, inode);
  402. }
  403.  
  404. static ssize_t lstats_write(struct file *file, const char __user *buf,
  405.                 size_t count, loff_t *offs)
  406. {
  407.     struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
  408.  
  409.     if (!task)
  410.         return -ESRCH;
  411.     clear_all_latency_tracing(task);
  412.     put_task_struct(task);
  413.  
  414.     return count;
  415. }
  416.  
  417. static const struct file_operations proc_lstats_operations = {
  418.     .open       = lstats_open,
  419.     .read       = seq_read,
  420.     .write      = lstats_write,
  421.     .llseek     = seq_lseek,
  422.     .release    = single_release,
  423. };
  424.  
  425. #endif
  426.  
  427. static int proc_oom_score(struct task_struct *task, char *buffer)
  428. {
  429.     unsigned long points = 0;
  430.  
  431.     read_lock(&tasklist_lock);
  432.     if (pid_alive(task))
  433.         points = oom_badness(task, NULL, NULL,
  434.                     totalram_pages + total_swap_pages);
  435.     read_unlock(&tasklist_lock);
  436.     return sprintf(buffer, "%lu\n", points);
  437. }
  438.  
  439. struct limit_names {
  440.     char *name;
  441.     char *unit;
  442. };
  443.  
  444. static const struct limit_names lnames[RLIM_NLIMITS] = {
  445.     [RLIMIT_CPU] = {"Max cpu time", "seconds"},
  446.     [RLIMIT_FSIZE] = {"Max file size", "bytes"},
  447.     [RLIMIT_DATA] = {"Max data size", "bytes"},
  448.     [RLIMIT_STACK] = {"Max stack size", "bytes"},
  449.     [RLIMIT_CORE] = {"Max core file size", "bytes"},
  450.     [RLIMIT_RSS] = {"Max resident set", "bytes"},
  451.     [RLIMIT_NPROC] = {"Max processes", "processes"},
  452.     [RLIMIT_NOFILE] = {"Max open files", "files"},
  453.     [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
  454.     [RLIMIT_AS] = {"Max address space", "bytes"},
  455.     [RLIMIT_LOCKS] = {"Max file locks", "locks"},
  456.     [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
  457.     [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
  458.     [RLIMIT_NICE] = {"Max nice priority", NULL},
  459.     [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
  460.     [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
  461. };
  462.  
  463. /* Display limits for a process */
  464. static int proc_pid_limits(struct task_struct *task, char *buffer)
  465. {
  466.     unsigned int i;
  467.     int count = 0;
  468.     unsigned long flags;
  469.     char *bufptr = buffer;
  470.  
  471.     struct rlimit rlim[RLIM_NLIMITS];
  472.  
  473.     if (!lock_task_sighand(task, &flags))
  474.         return 0;
  475.     memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
  476.     unlock_task_sighand(task, &flags);
  477.  
  478.     /*
  479.      * print the file header
  480.      */
  481.     count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
  482.             "Limit", "Soft Limit", "Hard Limit", "Units");
  483.  
  484.     for (i = 0; i < RLIM_NLIMITS; i++) {
  485.         if (rlim[i].rlim_cur == RLIM_INFINITY)
  486.             count += sprintf(&bufptr[count], "%-25s %-20s ",
  487.                      lnames[i].name, "unlimited");
  488.         else
  489.             count += sprintf(&bufptr[count], "%-25s %-20lu ",
  490.                      lnames[i].name, rlim[i].rlim_cur);
  491.  
  492.         if (rlim[i].rlim_max == RLIM_INFINITY)
  493.             count += sprintf(&bufptr[count], "%-20s ", "unlimited");
  494.         else
  495.             count += sprintf(&bufptr[count], "%-20lu ",
  496.                      rlim[i].rlim_max);
  497.  
  498.         if (lnames[i].unit)
  499.             count += sprintf(&bufptr[count], "%-10s\n",
  500.                      lnames[i].unit);
  501.         else
  502.             count += sprintf(&bufptr[count], "\n");
  503.     }
  504.  
  505.     return count;
  506. }
  507.  
  508. #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
  509. static int proc_pid_syscall(struct task_struct *task, char *buffer)
  510. {
  511.     long nr;
  512.     unsigned long args[6], sp, pc;
  513.     int res = lock_trace(task);
  514.     if (res)
  515.         return res;
  516.  
  517.     if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
  518.         res = sprintf(buffer, "running\n");
  519.     else if (nr < 0)
  520.         res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
  521.     else
  522.         res = sprintf(buffer,
  523.                "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
  524.                nr,
  525.                args[0], args[1], args[2], args[3], args[4], args[5],
  526.                sp, pc);
  527.     unlock_trace(task);
  528.     return res;
  529. }
  530. #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
  531.  
  532. /************************************************************************/
  533. /*                       Here the fs part begins                        */
  534. /************************************************************************/
  535.  
  536. /* permission checks */
  537. static int proc_fd_access_allowed(struct inode *inode)
  538. {
  539.     struct task_struct *task;
  540.     int allowed = 0;
  541.     /* Allow access to a task's file descriptors if it is us or we
  542.      * may use ptrace attach to the process and find out that
  543.      * information.
  544.      */
  545.     task = get_proc_task(inode);
  546.     if (task) {
  547.         allowed = ptrace_may_access(task, PTRACE_MODE_READ);
  548.         put_task_struct(task);
  549.     }
  550.     return allowed;
  551. }
  552.  
  553. int proc_setattr(struct dentry *dentry, struct iattr *attr)
  554. {
  555.     int error;
  556.     struct inode *inode = dentry->d_inode;
  557.  
  558.     if (attr->ia_valid & ATTR_MODE)
  559.         return -EPERM;
  560.  
  561.     error = inode_change_ok(inode, attr);
  562.     if (error)
  563.         return error;
  564.  
  565.     if ((attr->ia_valid & ATTR_SIZE) &&
  566.         attr->ia_size != i_size_read(inode)) {
  567.         error = vmtruncate(inode, attr->ia_size);
  568.         if (error)
  569.             return error;
  570.     }
  571.  
  572.     setattr_copy(inode, attr);
  573.     mark_inode_dirty(inode);
  574.     return 0;
  575. }
  576.  
  577. static const struct inode_operations proc_def_inode_operations = {
  578.     .setattr    = proc_setattr,
  579. };
  580.  
  581. static int mounts_open_common(struct inode *inode, struct file *file,
  582.                   const struct seq_operations *op)
  583. {
  584.     struct task_struct *task = get_proc_task(inode);
  585.     struct nsproxy *nsp;
  586.     struct mnt_namespace *ns = NULL;
  587.     struct path root;
  588.     struct proc_mounts *p;
  589.     int ret = -EINVAL;
  590.  
  591.     if (task) {
  592.         rcu_read_lock();
  593.         nsp = task_nsproxy(task);
  594.         if (nsp) {
  595.             ns = nsp->mnt_ns;
  596.             if (ns)
  597.                 get_mnt_ns(ns);
  598.         }
  599.         rcu_read_unlock();
  600.         if (ns && get_task_root(task, &root) == 0)
  601.             ret = 0;
  602.         put_task_struct(task);
  603.     }
  604.  
  605.     if (!ns)
  606.         goto err;
  607.     if (ret)
  608.         goto err_put_ns;
  609.  
  610.     ret = -ENOMEM;
  611.     p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
  612.     if (!p)
  613.         goto err_put_path;
  614.  
  615.     file->private_data = &p->m;
  616.     ret = seq_open(file, op);
  617.     if (ret)
  618.         goto err_free;
  619.  
  620.     p->m.private = p;
  621.     p->ns = ns;
  622.     p->root = root;
  623.     p->m.poll_event = ns->event;
  624.  
  625.     return 0;
  626.  
  627.  err_free:
  628.     kfree(p);
  629.  err_put_path:
  630.     path_put(&root);
  631.  err_put_ns:
  632.     put_mnt_ns(ns);
  633.  err:
  634.     return ret;
  635. }
  636.  
  637. static int mounts_release(struct inode *inode, struct file *file)
  638. {
  639.     struct proc_mounts *p = file->private_data;
  640.     path_put(&p->root);
  641.     put_mnt_ns(p->ns);
  642.     return seq_release(inode, file);
  643. }
  644.  
  645. static unsigned mounts_poll(struct file *file, poll_table *wait)
  646. {
  647.     struct proc_mounts *p = file->private_data;
  648.     unsigned res = POLLIN | POLLRDNORM;
  649.  
  650.     poll_wait(file, &p->ns->poll, wait);
  651.     if (mnt_had_events(p))
  652.         res |= POLLERR | POLLPRI;
  653.  
  654.     return res;
  655. }
  656.  
  657. static int mounts_open(struct inode *inode, struct file *file)
  658. {
  659.     return mounts_open_common(inode, file, &mounts_op);
  660. }
  661.  
  662. static const struct file_operations proc_mounts_operations = {
  663.     .open       = mounts_open,
  664.     .read       = seq_read,
  665.     .llseek     = seq_lseek,
  666.     .release    = mounts_release,
  667.     .poll       = mounts_poll,
  668. };
  669.  
  670. static int mountinfo_open(struct inode *inode, struct file *file)
  671. {
  672.     return mounts_open_common(inode, file, &mountinfo_op);
  673. }
  674.  
  675. static const struct file_operations proc_mountinfo_operations = {
  676.     .open       = mountinfo_open,
  677.     .read       = seq_read,
  678.     .llseek     = seq_lseek,
  679.     .release    = mounts_release,
  680.     .poll       = mounts_poll,
  681. };
  682.  
  683. static int mountstats_open(struct inode *inode, struct file *file)
  684. {
  685.     return mounts_open_common(inode, file, &mountstats_op);
  686. }
  687.  
  688. static const struct file_operations proc_mountstats_operations = {
  689.     .open       = mountstats_open,
  690.     .read       = seq_read,
  691.     .llseek     = seq_lseek,
  692.     .release    = mounts_release,
  693. };
  694.  
  695. #define PROC_BLOCK_SIZE (3*1024)        /* 4K page size but our output routines use some slack for overruns */
  696.  
  697. static ssize_t proc_info_read(struct file * file, char __user * buf,
  698.               size_t count, loff_t *ppos)
  699. {
  700.     struct inode * inode = file->f_path.dentry->d_inode;
  701.     unsigned long page;
  702.     ssize_t length;
  703.     struct task_struct *task = get_proc_task(inode);
  704.  
  705.     length = -ESRCH;
  706.     if (!task)
  707.         goto out_no_task;
  708.  
  709.     if (count > PROC_BLOCK_SIZE)
  710.         count = PROC_BLOCK_SIZE;
  711.  
  712.     length = -ENOMEM;
  713.     if (!(page = __get_free_page(GFP_TEMPORARY)))
  714.         goto out;
  715.  
  716.     length = PROC_I(inode)->op.proc_read(task, (char*)page);
  717.  
  718.     if (length >= 0)
  719.         length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
  720.     free_page(page);
  721. out:
  722.     put_task_struct(task);
  723. out_no_task:
  724.     return length;
  725. }
  726.  
  727. static const struct file_operations proc_info_file_operations = {
  728.     .read       = proc_info_read,
  729.     .llseek     = generic_file_llseek,
  730. };
  731.  
  732. static int proc_single_show(struct seq_file *m, void *v)
  733. {
  734.     struct inode *inode = m->private;
  735.     struct pid_namespace *ns;
  736.     struct pid *pid;
  737.     struct task_struct *task;
  738.     int ret;
  739.  
  740.     ns = inode->i_sb->s_fs_info;
  741.     pid = proc_pid(inode);
  742.     task = get_pid_task(pid, PIDTYPE_PID);
  743.     if (!task)
  744.         return -ESRCH;
  745.  
  746.     ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
  747.  
  748.     put_task_struct(task);
  749.     return ret;
  750. }
  751.  
  752. static int proc_single_open(struct inode *inode, struct file *filp)
  753. {
  754.     return single_open(filp, proc_single_show, inode);
  755. }
  756.  
  757. static const struct file_operations proc_single_file_operations = {
  758.     .open       = proc_single_open,
  759.     .read       = seq_read,
  760.     .llseek     = seq_lseek,
  761.     .release    = single_release,
  762. };
  763.  
  764. static int mem_open(struct inode* inode, struct file* file)
  765. {
  766.     struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
  767.     struct mm_struct *mm;
  768.  
  769.     if (!task)
  770.         return -ESRCH;
  771.  
  772.     mm = mm_access(task, PTRACE_MODE_ATTACH);
  773.     put_task_struct(task);
  774.  
  775.     if (IS_ERR(mm))
  776.         return PTR_ERR(mm);
  777.  
  778.     /* OK to pass negative loff_t, we can catch out-of-range */
  779.     file->f_mode |= FMODE_UNSIGNED_OFFSET;
  780.     file->private_data = mm;
  781.  
  782.     return 0;
  783. }
  784.  
  785. static ssize_t mem_read(struct file * file, char __user * buf,
  786.             size_t count, loff_t *ppos)
  787. {
  788.     int ret;
  789.     char *page;
  790.     unsigned long src = *ppos;
  791.     struct mm_struct *mm = file->private_data;
  792.  
  793.     if (!mm)
  794.         return 0;
  795.  
  796.     page = (char *)__get_free_page(GFP_TEMPORARY);
  797.     if (!page)
  798.         return -ENOMEM;
  799.  
  800.     ret = 0;
  801.  
  802.     while (count > 0) {
  803.         int this_len, retval;
  804.  
  805.         this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
  806.         retval = access_remote_vm(mm, src, page, this_len, 0);
  807.         if (!retval) {
  808.             if (!ret)
  809.                 ret = -EIO;
  810.             break;
  811.         }
  812.  
  813.         if (copy_to_user(buf, page, retval)) {
  814.             ret = -EFAULT;
  815.             break;
  816.         }
  817.  
  818.         ret += retval;
  819.         src += retval;
  820.         buf += retval;
  821.         count -= retval;
  822.     }
  823.     *ppos = src;
  824.  
  825.     free_page((unsigned long) page);
  826.     return ret;
  827. }
  828.  
  829. static ssize_t mem_write(struct file * file, const char __user *buf,
  830.              size_t count, loff_t *ppos)
  831. {
  832.     int copied;
  833.     char *page;
  834.     unsigned long dst = *ppos;
  835.     struct mm_struct *mm = file->private_data;
  836.  
  837.     if (!mm)
  838.         return 0;
  839.  
  840.     page = (char *)__get_free_page(GFP_TEMPORARY);
  841.     if (!page)
  842.         return -ENOMEM;
  843.  
  844.     copied = 0;
  845.     while (count > 0) {
  846.         int this_len, retval;
  847.  
  848.         this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
  849.         if (copy_from_user(page, buf, this_len)) {
  850.             copied = -EFAULT;
  851.             break;
  852.         }
  853.         retval = access_remote_vm(mm, dst, page, this_len, 1);
  854.         if (!retval) {
  855.             if (!copied)
  856.                 copied = -EIO;
  857.             break;
  858.         }
  859.         copied += retval;
  860.         buf += retval;
  861.         dst += retval;
  862.         count -= retval;           
  863.     }
  864.     *ppos = dst;
  865.  
  866.     free_page((unsigned long) page);
  867.     return copied;
  868. }
  869.  
  870. loff_t mem_lseek(struct file *file, loff_t offset, int orig)
  871. {
  872.     switch (orig) {
  873.     case 0:
  874.         file->f_pos = offset;
  875.         break;
  876.     case 1:
  877.         file->f_pos += offset;
  878.         break;
  879.     default:
  880.         return -EINVAL;
  881.     }
  882.     force_successful_syscall_return();
  883.     return file->f_pos;
  884. }
  885.  
  886. static int mem_release(struct inode *inode, struct file *file)
  887. {
  888.     struct mm_struct *mm = file->private_data;
  889.  
  890.     mmput(mm);
  891.     return 0;
  892. }
  893.  
  894. static const struct file_operations proc_mem_operations = {
  895.     .llseek     = mem_lseek,
  896.     .read       = mem_read,
  897.     .write      = mem_write,
  898.     .open       = mem_open,
  899.     .release    = mem_release,
  900. };
  901.  
  902. static ssize_t environ_read(struct file *file, char __user *buf,
  903.             size_t count, loff_t *ppos)
  904. {
  905.     struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
  906.     char *page;
  907.     unsigned long src = *ppos;
  908.     int ret = -ESRCH;
  909.     struct mm_struct *mm;
  910.  
  911.     if (!task)
  912.         goto out_no_task;
  913.  
  914.     ret = -ENOMEM;
  915.     page = (char *)__get_free_page(GFP_TEMPORARY);
  916.     if (!page)
  917.         goto out;
  918.  
  919.  
  920.     mm = mm_for_maps(task);
  921.     ret = PTR_ERR(mm);
  922.     if (!mm || IS_ERR(mm))
  923.         goto out_free;
  924.  
  925.     ret = 0;
  926.     while (count > 0) {
  927.         int this_len, retval, max_len;
  928.  
  929.         this_len = mm->env_end - (mm->env_start + src);
  930.  
  931.         if (this_len <= 0)
  932.             break;
  933.  
  934.         max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
  935.         this_len = (this_len > max_len) ? max_len : this_len;
  936.  
  937.         retval = access_process_vm(task, (mm->env_start + src),
  938.             page, this_len, 0);
  939.  
  940.         if (retval <= 0) {
  941.             ret = retval;
  942.             break;
  943.         }
  944.  
  945.         if (copy_to_user(buf, page, retval)) {
  946.             ret = -EFAULT;
  947.             break;
  948.         }
  949.  
  950.         ret += retval;
  951.         src += retval;
  952.         buf += retval;
  953.         count -= retval;
  954.     }
  955.     *ppos = src;
  956.  
  957.     mmput(mm);
  958. out_free:
  959.     free_page((unsigned long) page);
  960. out:
  961.     put_task_struct(task);
  962. out_no_task:
  963.     return ret;
  964. }
  965.  
  966. static const struct file_operations proc_environ_operations = {
  967.     .read       = environ_read,
  968.     .llseek     = generic_file_llseek,
  969. };
  970.  
  971. static ssize_t oom_adjust_read(struct file *file, char __user *buf,
  972.                 size_t count, loff_t *ppos)
  973. {
  974.     struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
  975.     char buffer[PROC_NUMBUF];
  976.     size_t len;
  977.     int oom_adjust = OOM_DISABLE;
  978.     unsigned long flags;
  979.  
  980.     if (!task)
  981.         return -ESRCH;
  982.  
  983.     if (lock_task_sighand(task, &flags)) {
  984.         oom_adjust = task->signal->oom_adj;
  985.         unlock_task_sighand(task, &flags);
  986.     }
  987.  
  988.     put_task_struct(task);
  989.  
  990.     len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
  991.  
  992.     return simple_read_from_buffer(buf, count, ppos, buffer, len);
  993. }
  994.  
  995. static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
  996.                 size_t count, loff_t *ppos)
  997. {
  998.     struct task_struct *task;
  999.     char buffer[PROC_NUMBUF];
  1000.     int oom_adjust;
  1001.     unsigned long flags;
  1002.     int err;
  1003.  
  1004.     memset(buffer, 0, sizeof(buffer));
  1005.     if (count > sizeof(buffer) - 1)
  1006.         count = sizeof(buffer) - 1;
  1007.     if (copy_from_user(buffer, buf, count)) {
  1008.         err = -EFAULT;
  1009.         goto out;
  1010.     }
  1011.  
  1012.     err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
  1013.     if (err)
  1014.         goto out;
  1015.     if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
  1016.          oom_adjust != OOM_DISABLE) {
  1017.         err = -EINVAL;
  1018.         goto out;
  1019.     }
  1020.  
  1021.     task = get_proc_task(file->f_path.dentry->d_inode);
  1022.     if (!task) {
  1023.         err = -ESRCH;
  1024.         goto out;
  1025.     }
  1026.  
  1027.     task_lock(task);
  1028.     if (!task->mm) {
  1029.         err = -EINVAL;
  1030.         goto err_task_lock;
  1031.     }
  1032.  
  1033.     if (!lock_task_sighand(task, &flags)) {
  1034.         err = -ESRCH;
  1035.         goto err_task_lock;
  1036.     }
  1037.  
  1038.     if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
  1039.         err = -EACCES;
  1040.         goto err_sighand;
  1041.     }
  1042.  
  1043.     /*
  1044.      * Warn that /proc/pid/oom_adj is deprecated, see
  1045.      * Documentation/feature-removal-schedule.txt.
  1046.      */
  1047.     printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
  1048.           current->comm, task_pid_nr(current), task_pid_nr(task),
  1049.           task_pid_nr(task));
  1050.     task->signal->oom_adj = oom_adjust;
  1051.     /*
  1052.      * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
  1053.      * value is always attainable.
  1054.      */
  1055.     if (task->signal->oom_adj == OOM_ADJUST_MAX)
  1056.         task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
  1057.     else
  1058.         task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
  1059.                                 -OOM_DISABLE;
  1060. err_sighand:
  1061.     unlock_task_sighand(task, &flags);
  1062. err_task_lock:
  1063.     task_unlock(task);
  1064.     put_task_struct(task);
  1065. out:
  1066.     return err < 0 ? err : count;
  1067. }
  1068.  
  1069. static const struct file_operations proc_oom_adjust_operations = {
  1070.     .read       = oom_adjust_read,
  1071.     .write      = oom_adjust_write,
  1072.     .llseek     = generic_file_llseek,
  1073. };
  1074.  
  1075. static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
  1076.                     size_t count, loff_t *ppos)
  1077. {
  1078.     struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
  1079.     char buffer[PROC_NUMBUF];
  1080.     int oom_score_adj = OOM_SCORE_ADJ_MIN;
  1081.     unsigned long flags;
  1082.     size_t len;
  1083.  
  1084.     if (!task)
  1085.         return -ESRCH;
  1086.     if (lock_task_sighand(task, &flags)) {
  1087.         oom_score_adj = task->signal->oom_score_adj;
  1088.         unlock_task_sighand(task, &flags);
  1089.     }
  1090.     put_task_struct(task);
  1091.     len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
  1092.     return simple_read_from_buffer(buf, count, ppos, buffer, len);
  1093. }
  1094.  
  1095. static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
  1096.                     size_t count, loff_t *ppos)
  1097. {
  1098.     struct task_struct *task;
  1099.     char buffer[PROC_NUMBUF];
  1100.     unsigned long flags;
  1101.     int oom_score_adj;
  1102.     int err;
  1103.  
  1104.     memset(buffer, 0, sizeof(buffer));
  1105.     if (count > sizeof(buffer) - 1)
  1106.         count = sizeof(buffer) - 1;
  1107.     if (copy_from_user(buffer, buf, count)) {
  1108.         err = -EFAULT;
  1109.         goto out;
  1110.     }
  1111.  
  1112.     err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
  1113.     if (err)
  1114.         goto out;
  1115.     if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
  1116.             oom_score_adj > OOM_SCORE_ADJ_MAX) {
  1117.         err = -EINVAL;
  1118.         goto out;
  1119.     }
  1120.  
  1121.     task = get_proc_task(file->f_path.dentry->d_inode);
  1122.     if (!task) {
  1123.         err = -ESRCH;
  1124.         goto out;
  1125.     }
  1126.  
  1127.     task_lock(task);
  1128.     if (!task->mm) {
  1129.         err = -EINVAL;
  1130.         goto err_task_lock;
  1131.     }
  1132.  
  1133.     if (!lock_task_sighand(task, &flags)) {
  1134.         err = -ESRCH;
  1135.         goto err_task_lock;
  1136.     }
  1137.  
  1138.     if (oom_score_adj < task->signal->oom_score_adj_min &&
  1139.             !capable(CAP_SYS_RESOURCE)) {
  1140.         err = -EACCES;
  1141.         goto err_sighand;
  1142.     }
  1143.  
  1144.     task->signal->oom_score_adj = oom_score_adj;
  1145.     if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
  1146.         task->signal->oom_score_adj_min = oom_score_adj;
  1147.     /*
  1148.      * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
  1149.      * always attainable.
  1150.      */
  1151.     if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
  1152.         task->signal->oom_adj = OOM_DISABLE;
  1153.     else
  1154.         task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
  1155.                             OOM_SCORE_ADJ_MAX;
  1156. err_sighand:
  1157.     unlock_task_sighand(task, &flags);
  1158. err_task_lock:
  1159.     task_unlock(task);
  1160.     put_task_struct(task);
  1161. out:
  1162.     return err < 0 ? err : count;
  1163. }
  1164.  
  1165. static const struct file_operations proc_oom_score_adj_operations = {
  1166.     .read       = oom_score_adj_read,
  1167.     .write      = oom_score_adj_write,
  1168.     .llseek     = default_llseek,
  1169. };
  1170.  
  1171. #ifdef CONFIG_AUDITSYSCALL
  1172. #define TMPBUFLEN 21
  1173. static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
  1174.                   size_t count, loff_t *ppos)
  1175. {
  1176.     struct inode * inode = file->f_path.dentry->d_inode;
  1177.     struct task_struct *task = get_proc_task(inode);
  1178.     ssize_t length;
  1179.     char tmpbuf[TMPBUFLEN];
  1180.  
  1181.     if (!task)
  1182.         return -ESRCH;
  1183.     length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
  1184.                 audit_get_loginuid(task));
  1185.     put_task_struct(task);
  1186.     return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
  1187. }
  1188.  
  1189. static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
  1190.                    size_t count, loff_t *ppos)
  1191. {
  1192.     struct inode * inode = file->f_path.dentry->d_inode;
  1193.     char *page, *tmp;
  1194.     ssize_t length;
  1195.     uid_t loginuid;
  1196.  
  1197.     if (!capable(CAP_AUDIT_CONTROL))
  1198.         return -EPERM;
  1199.  
  1200.     rcu_read_lock();
  1201.     if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
  1202.         rcu_read_unlock();
  1203.         return -EPERM;
  1204.     }
  1205.     rcu_read_unlock();
  1206.  
  1207.     if (count >= PAGE_SIZE)
  1208.         count = PAGE_SIZE - 1;
  1209.  
  1210.     if (*ppos != 0) {
  1211.         /* No partial writes. */
  1212.         return -EINVAL;
  1213.     }
  1214.     page = (char*)__get_free_page(GFP_TEMPORARY);
  1215.     if (!page)
  1216.         return -ENOMEM;
  1217.     length = -EFAULT;
  1218.     if (copy_from_user(page, buf, count))
  1219.         goto out_free_page;
  1220.  
  1221.     page[count] = '\0';
  1222.     loginuid = simple_strtoul(page, &tmp, 10);
  1223.     if (tmp == page) {
  1224.         length = -EINVAL;
  1225.         goto out_free_page;
  1226.  
  1227.     }
  1228.     length = audit_set_loginuid(current, loginuid);
  1229.     if (likely(length == 0))
  1230.         length = count;
  1231.  
  1232. out_free_page:
  1233.     free_page((unsigned long) page);
  1234.     return length;
  1235. }
  1236.  
  1237. static const struct file_operations proc_loginuid_operations = {
  1238.     .read       = proc_loginuid_read,
  1239.     .write      = proc_loginuid_write,
  1240.     .llseek     = generic_file_llseek,
  1241. };
  1242.  
  1243. static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
  1244.                   size_t count, loff_t *ppos)
  1245. {
  1246.     struct inode * inode = file->f_path.dentry->d_inode;
  1247.     struct task_struct *task = get_proc_task(inode);
  1248.     ssize_t length;
  1249.     char tmpbuf[TMPBUFLEN];
  1250.  
  1251.     if (!task)
  1252.         return -ESRCH;
  1253.     length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
  1254.                 audit_get_sessionid(task));
  1255.     put_task_struct(task);
  1256.     return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
  1257. }
  1258.  
  1259. static const struct file_operations proc_sessionid_operations = {
  1260.     .read       = proc_sessionid_read,
  1261.     .llseek     = generic_file_llseek,
  1262. };
  1263. #endif
  1264.  
  1265. #ifdef CONFIG_FAULT_INJECTION
  1266. static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
  1267.                       size_t count, loff_t *ppos)
  1268. {
  1269.     struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
  1270.     char buffer[PROC_NUMBUF];
  1271.     size_t len;
  1272.     int make_it_fail;
  1273.  
  1274.     if (!task)
  1275.         return -ESRCH;
  1276.     make_it_fail = task->make_it_fail;
  1277.     put_task_struct(task);
  1278.  
  1279.     len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
  1280.  
  1281.     return simple_read_from_buffer(buf, count, ppos, buffer, len);
  1282. }
  1283.  
  1284. static ssize_t proc_fault_inject_write(struct file * file,
  1285.             const char __user * buf, size_t count, loff_t *ppos)
  1286. {
  1287.     struct task_struct *task;
  1288.     char buffer[PROC_NUMBUF], *end;
  1289.     int make_it_fail;
  1290.  
  1291.     if (!capable(CAP_SYS_RESOURCE))
  1292.         return -EPERM;
  1293.     memset(buffer, 0, sizeof(buffer));
  1294.     if (count > sizeof(buffer) - 1)
  1295.         count = sizeof(buffer) - 1;
  1296.     if (copy_from_user(buffer, buf, count))
  1297.         return -EFAULT;
  1298.     make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
  1299.     if (*end)
  1300.         return -EINVAL;
  1301.     task = get_proc_task(file->f_dentry->d_inode);
  1302.     if (!task)
  1303.         return -ESRCH;
  1304.     task->make_it_fail = make_it_fail;
  1305.     put_task_struct(task);
  1306.  
  1307.     return count;
  1308. }
  1309.  
  1310. static const struct file_operations proc_fault_inject_operations = {
  1311.     .read       = proc_fault_inject_read,
  1312.     .write      = proc_fault_inject_write,
  1313.     .llseek     = generic_file_llseek,
  1314. };
  1315. #endif
  1316.  
  1317.  
  1318. #ifdef CONFIG_SCHED_DEBUG
  1319. /*
  1320.  * Print out various scheduling related per-task fields:
  1321.  */
  1322. static int sched_show(struct seq_file *m, void *v)
  1323. {
  1324.     struct inode *inode = m->private;
  1325.     struct task_struct *p;
  1326.  
  1327.     p = get_proc_task(inode);
  1328.     if (!p)
  1329.         return -ESRCH;
  1330.     proc_sched_show_task(p, m);
  1331.  
  1332.     put_task_struct(p);
  1333.  
  1334.     return 0;
  1335. }
  1336.  
  1337. static ssize_t
  1338. sched_write(struct file *file, const char __user *buf,
  1339.         size_t count, loff_t *offset)
  1340. {
  1341.     struct inode *inode = file->f_path.dentry->d_inode;
  1342.     struct task_struct *p;
  1343.  
  1344.     p = get_proc_task(inode);
  1345.     if (!p)
  1346.         return -ESRCH;
  1347.     proc_sched_set_task(p);
  1348.  
  1349.     put_task_struct(p);
  1350.  
  1351.     return count;
  1352. }
  1353.  
  1354. static int sched_open(struct inode *inode, struct file *filp)
  1355. {
  1356.     return single_open(filp, sched_show, inode);
  1357. }
  1358.  
  1359. static const struct file_operations proc_pid_sched_operations = {
  1360.     .open       = sched_open,
  1361.     .read       = seq_read,
  1362.     .write      = sched_write,
  1363.     .llseek     = seq_lseek,
  1364.     .release    = single_release,
  1365. };
  1366.  
  1367. #endif
  1368.  
  1369. #ifdef CONFIG_SCHED_AUTOGROUP
  1370. /*
  1371.  * Print out autogroup related information:
  1372.  */
  1373. static int sched_autogroup_show(struct seq_file *m, void *v)
  1374. {
  1375.     struct inode *inode = m->private;
  1376.     struct task_struct *p;
  1377.  
  1378.     p = get_proc_task(inode);
  1379.     if (!p)
  1380.         return -ESRCH;
  1381.     proc_sched_autogroup_show_task(p, m);
  1382.  
  1383.     put_task_struct(p);
  1384.  
  1385.     return 0;
  1386. }
  1387.  
  1388. static ssize_t
  1389. sched_autogroup_write(struct file *file, const char __user *buf,
  1390.         size_t count, loff_t *offset)
  1391. {
  1392.     struct inode *inode = file->f_path.dentry->d_inode;
  1393.     struct task_struct *p;
  1394.     char buffer[PROC_NUMBUF];
  1395.     int nice;
  1396.     int err;
  1397.  
  1398.     memset(buffer, 0, sizeof(buffer));
  1399.     if (count > sizeof(buffer) - 1)
  1400.         count = sizeof(buffer) - 1;
  1401.     if (copy_from_user(buffer, buf, count))
  1402.         return -EFAULT;
  1403.  
  1404.     err = kstrtoint(strstrip(buffer), 0, &nice);
  1405.     if (err < 0)
  1406.         return err;
  1407.  
  1408.     p = get_proc_task(inode);
  1409.     if (!p)
  1410.         return -ESRCH;
  1411.  
  1412.     err = nice;
  1413.     err = proc_sched_autogroup_set_nice(p, &err);
  1414.     if (err)
  1415.         count = err;
  1416.  
  1417.     put_task_struct(p);
  1418.  
  1419.     return count;
  1420. }
  1421.  
  1422. static int sched_autogroup_open(struct inode *inode, struct file *filp)
  1423. {
  1424.     int ret;
  1425.  
  1426.     ret = single_open(filp, sched_autogroup_show, NULL);
  1427.     if (!ret) {
  1428.         struct seq_file *m = filp->private_data;
  1429.  
  1430.         m->private = inode;
  1431.     }
  1432.     return ret;
  1433. }
  1434.  
  1435. static const struct file_operations proc_pid_sched_autogroup_operations = {
  1436.     .open       = sched_autogroup_open,
  1437.     .read       = seq_read,
  1438.     .write      = sched_autogroup_write,
  1439.     .llseek     = seq_lseek,
  1440.     .release    = single_release,
  1441. };
  1442.  
  1443. #endif /* CONFIG_SCHED_AUTOGROUP */
  1444.  
  1445. static ssize_t comm_write(struct file *file, const char __user *buf,
  1446.                 size_t count, loff_t *offset)
  1447. {
  1448.     struct inode *inode = file->f_path.dentry->d_inode;
  1449.     struct task_struct *p;
  1450.     char buffer[TASK_COMM_LEN];
  1451.  
  1452.     memset(buffer, 0, sizeof(buffer));
  1453.     if (count > sizeof(buffer) - 1)
  1454.         count = sizeof(buffer) - 1;
  1455.     if (copy_from_user(buffer, buf, count))
  1456.         return -EFAULT;
  1457.  
  1458.     p = get_proc_task(inode);
  1459.     if (!p)
  1460.         return -ESRCH;
  1461.  
  1462.     if (same_thread_group(current, p))
  1463.         set_task_comm(p, buffer);
  1464.     else
  1465.         count = -EINVAL;
  1466.  
  1467.     put_task_struct(p);
  1468.  
  1469.     return count;
  1470. }
  1471.  
  1472. static int comm_show(struct seq_file *m, void *v)
  1473. {
  1474.     struct inode *inode = m->private;
  1475.     struct task_struct *p;
  1476.  
  1477.     p = get_proc_task(inode);
  1478.     if (!p)
  1479.         return -ESRCH;
  1480.  
  1481.     task_lock(p);
  1482.     seq_printf(m, "%s\n", p->comm);
  1483.     task_unlock(p);
  1484.  
  1485.     put_task_struct(p);
  1486.  
  1487.     return 0;
  1488. }
  1489.  
  1490. static int comm_open(struct inode *inode, struct file *filp)
  1491. {
  1492.     return single_open(filp, comm_show, inode);
  1493. }
  1494.  
  1495. static const struct file_operations proc_pid_set_comm_operations = {
  1496.     .open       = comm_open,
  1497.     .read       = seq_read,
  1498.     .write      = comm_write,
  1499.     .llseek     = seq_lseek,
  1500.     .release    = single_release,
  1501. };
  1502.  
  1503. static int proc_exe_link(struct inode *inode, struct path *exe_path)
  1504. {
  1505.     struct task_struct *task;
  1506.     struct mm_struct *mm;
  1507.     struct file *exe_file;
  1508.  
  1509.     task = get_proc_task(inode);
  1510.     if (!task)
  1511.         return -ENOENT;
  1512.     mm = get_task_mm(task);
  1513.     put_task_struct(task);
  1514.     if (!mm)
  1515.         return -ENOENT;
  1516.     exe_file = get_mm_exe_file(mm);
  1517.     mmput(mm);
  1518.     if (exe_file) {
  1519.         *exe_path = exe_file->f_path;
  1520.         path_get(&exe_file->f_path);
  1521.         fput(exe_file);
  1522.         return 0;
  1523.     } else
  1524.         return -ENOENT;
  1525. }
  1526.  
  1527. static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
  1528. {
  1529.     struct inode *inode = dentry->d_inode;
  1530.     int error = -EACCES;
  1531.  
  1532.     /* We don't need a base pointer in the /proc filesystem */
  1533.     path_put(&nd->path);
  1534.  
  1535.     /* Are we allowed to snoop on the tasks file descriptors? */
  1536.     if (!proc_fd_access_allowed(inode))
  1537.         goto out;
  1538.  
  1539.     error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
  1540. out:
  1541.     return ERR_PTR(error);
  1542. }
  1543.  
  1544. static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
  1545. {
  1546.     char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
  1547.     char *pathname;
  1548.     int len;
  1549.  
  1550.     if (!tmp)
  1551.         return -ENOMEM;
  1552.  
  1553.     pathname = d_path(path, tmp, PAGE_SIZE);
  1554.     len = PTR_ERR(pathname);
  1555.     if (IS_ERR(pathname))
  1556.         goto out;
  1557.     len = tmp + PAGE_SIZE - 1 - pathname;
  1558.  
  1559.     if (len > buflen)
  1560.         len = buflen;
  1561.     if (copy_to_user(buffer, pathname, len))
  1562.         len = -EFAULT;
  1563.  out:
  1564.     free_page((unsigned long)tmp);
  1565.     return len;
  1566. }
  1567.  
  1568. static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
  1569. {
  1570.     int error = -EACCES;
  1571.     struct inode *inode = dentry->d_inode;
  1572.     struct path path;
  1573.  
  1574.     /* Are we allowed to snoop on the tasks file descriptors? */
  1575.     if (!proc_fd_access_allowed(inode))
  1576.         goto out;
  1577.  
  1578.     error = PROC_I(inode)->op.proc_get_link(inode, &path);
  1579.     if (error)
  1580.         goto out;
  1581.  
  1582.     error = do_proc_readlink(&path, buffer, buflen);
  1583.     path_put(&path);
  1584. out:
  1585.     return error;
  1586. }
  1587.  
  1588. static const struct inode_operations proc_pid_link_inode_operations = {
  1589.     .readlink   = proc_pid_readlink,
  1590.     .follow_link    = proc_pid_follow_link,
  1591.     .setattr    = proc_setattr,
  1592. };
  1593.  
  1594.  
  1595. /* building an inode */
  1596.  
  1597. static int task_dumpable(struct task_struct *task)
  1598. {
  1599.     int dumpable = 0;
  1600.     struct mm_struct *mm;
  1601.  
  1602.     task_lock(task);
  1603.     mm = task->mm;
  1604.     if (mm)
  1605.         dumpable = get_dumpable(mm);
  1606.     task_unlock(task);
  1607.     if(dumpable == 1)
  1608.         return 1;
  1609.     return 0;
  1610. }
  1611.  
  1612. struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
  1613. {
  1614.     struct inode * inode;
  1615.     struct proc_inode *ei;
  1616.     const struct cred *cred;
  1617.  
  1618.     /* We need a new inode */
  1619.  
  1620.     inode = new_inode(sb);
  1621.     if (!inode)
  1622.         goto out;
  1623.  
  1624.     /* Common stuff */
  1625.     ei = PROC_I(inode);
  1626.     inode->i_ino = get_next_ino();
  1627.     inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  1628.     inode->i_op = &proc_def_inode_operations;
  1629.  
  1630.     /*
  1631.      * grab the reference to task.
  1632.      */
  1633.     ei->pid = get_task_pid(task, PIDTYPE_PID);
  1634.     if (!ei->pid)
  1635.         goto out_unlock;
  1636.  
  1637.     if (task_dumpable(task)) {
  1638.         rcu_read_lock();
  1639.         cred = __task_cred(task);
  1640.         inode->i_uid = cred->euid;
  1641.         inode->i_gid = cred->egid;
  1642.         rcu_read_unlock();
  1643.     }
  1644.     security_task_to_inode(task, inode);
  1645.  
  1646. out:
  1647.     return inode;
  1648.  
  1649. out_unlock:
  1650.     iput(inode);
  1651.     return NULL;
  1652. }
  1653.  
  1654. int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  1655. {
  1656.     struct inode *inode = dentry->d_inode;
  1657.     struct task_struct *task;
  1658.     const struct cred *cred;
  1659.  
  1660.     generic_fillattr(inode, stat);
  1661.  
  1662.     rcu_read_lock();
  1663.     stat->uid = 0;
  1664.     stat->gid = 0;
  1665.     task = pid_task(proc_pid(inode), PIDTYPE_PID);
  1666.     if (task) {
  1667.         if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
  1668.             task_dumpable(task)) {
  1669.             cred = __task_cred(task);
  1670.             stat->uid = cred->euid;
  1671.             stat->gid = cred->egid;
  1672.         }
  1673.     }
  1674.     rcu_read_unlock();
  1675.     return 0;
  1676. }
  1677.  
  1678. /* dentry stuff */
  1679.  
  1680. /*
  1681.  *  Exceptional case: normally we are not allowed to unhash a busy
  1682.  * directory. In this case, however, we can do it - no aliasing problems
  1683.  * due to the way we treat inodes.
  1684.  *
  1685.  * Rewrite the inode's ownerships here because the owning task may have
  1686.  * performed a setuid(), etc.
  1687.  *
  1688.  * Before the /proc/pid/status file was created the only way to read
  1689.  * the effective uid of a /process was to stat /proc/pid.  Reading
  1690.  * /proc/pid/status is slow enough that procps and other packages
  1691.  * kept stating /proc/pid.  To keep the rules in /proc simple I have
  1692.  * made this apply to all per process world readable and executable
  1693.  * directories.
  1694.  */
  1695. int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
  1696. {
  1697.     struct inode *inode;
  1698.     struct task_struct *task;
  1699.     const struct cred *cred;
  1700.  
  1701.     if (nd && nd->flags & LOOKUP_RCU)
  1702.         return -ECHILD;
  1703.  
  1704.     inode = dentry->d_inode;
  1705.     task = get_proc_task(inode);
  1706.  
  1707.     if (task) {
  1708.         if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
  1709.             task_dumpable(task)) {
  1710.             rcu_read_lock();
  1711.             cred = __task_cred(task);
  1712.             inode->i_uid = cred->euid;
  1713.             inode->i_gid = cred->egid;
  1714.             rcu_read_unlock();
  1715.         } else {
  1716.             inode->i_uid = 0;
  1717.             inode->i_gid = 0;
  1718.         }
  1719.         inode->i_mode &= ~(S_ISUID | S_ISGID);
  1720.         security_task_to_inode(task, inode);
  1721.         put_task_struct(task);
  1722.         return 1;
  1723.     }
  1724.     d_drop(dentry);
  1725.     return 0;
  1726. }
  1727.  
  1728. static int pid_delete_dentry(const struct dentry * dentry)
  1729. {
  1730.     /* Is the task we represent dead?
  1731.      * If so, then don't put the dentry on the lru list,
  1732.      * kill it immediately.
  1733.      */
  1734.     return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
  1735. }
  1736.  
  1737. const struct dentry_operations pid_dentry_operations =
  1738. {
  1739.     .d_revalidate   = pid_revalidate,
  1740.     .d_delete   = pid_delete_dentry,
  1741. };
  1742.  
  1743. /* Lookups */
  1744.  
  1745. /*
  1746.  * Fill a directory entry.
  1747.  *
  1748.  * If possible create the dcache entry and derive our inode number and
  1749.  * file type from dcache entry.
  1750.  *
  1751.  * Since all of the proc inode numbers are dynamically generated, the inode
  1752.  * numbers do not exist until the inode is cache.  This means creating the
  1753.  * the dcache entry in readdir is necessary to keep the inode numbers
  1754.  * reported by readdir in sync with the inode numbers reported
  1755.  * by stat.
  1756.  */
  1757. int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
  1758.     const char *name, int len,
  1759.     instantiate_t instantiate, struct task_struct *task, const void *ptr)
  1760. {
  1761.     struct dentry *child, *dir = filp->f_path.dentry;
  1762.     struct inode *inode;
  1763.     struct qstr qname;
  1764.     ino_t ino = 0;
  1765.     unsigned type = DT_UNKNOWN;
  1766.  
  1767.     qname.name = name;
  1768.     qname.len  = len;
  1769.     qname.hash = full_name_hash(name, len);
  1770.  
  1771.     child = d_lookup(dir, &qname);
  1772.     if (!child) {
  1773.         struct dentry *new;
  1774.         new = d_alloc(dir, &qname);
  1775.         if (new) {
  1776.             child = instantiate(dir->d_inode, new, task, ptr);
  1777.             if (child)
  1778.                 dput(new);
  1779.             else
  1780.                 child = new;
  1781.         }
  1782.     }
  1783.     if (!child || IS_ERR(child) || !child->d_inode)
  1784.         goto end_instantiate;
  1785.     inode = child->d_inode;
  1786.     if (inode) {
  1787.         ino = inode->i_ino;
  1788.         type = inode->i_mode >> 12;
  1789.     }
  1790.     dput(child);
  1791. end_instantiate:
  1792.     if (!ino)
  1793.         ino = find_inode_number(dir, &qname);
  1794.     if (!ino)
  1795.         ino = 1;
  1796.     return filldir(dirent, name, len, filp->f_pos, ino, type);
  1797. }
  1798.  
  1799. static unsigned name_to_int(struct dentry *dentry)
  1800. {
  1801.     const char *name = dentry->d_name.name;
  1802.     int len = dentry->d_name.len;
  1803.     unsigned n = 0;
  1804.  
  1805.     if (len > 1 && *name == '0')
  1806.         goto out;
  1807.     while (len-- > 0) {
  1808.         unsigned c = *name++ - '0';
  1809.         if (c > 9)
  1810.             goto out;
  1811.         if (n >= (~0U-9)/10)
  1812.             goto out;
  1813.         n *= 10;
  1814.         n += c;
  1815.     }
  1816.     return n;
  1817. out:
  1818.     return ~0U;
  1819. }
  1820.  
  1821. #define PROC_FDINFO_MAX 64
  1822.  
  1823. static int proc_fd_info(struct inode *inode, struct path *path, char *info)
  1824. {
  1825.     struct task_struct *task = get_proc_task(inode);
  1826.     struct files_struct *files = NULL;
  1827.     struct file *file;
  1828.     int fd = proc_fd(inode);
  1829.  
  1830.     if (task) {
  1831.         files = get_files_struct(task);
  1832.         put_task_struct(task);
  1833.     }
  1834.     if (files) {
  1835.         /*
  1836.          * We are not taking a ref to the file structure, so we must
  1837.          * hold ->file_lock.
  1838.          */
  1839.         spin_lock(&files->file_lock);
  1840.         file = fcheck_files(files, fd);
  1841.         if (file) {
  1842.             unsigned int f_flags;
  1843.             struct fdtable *fdt;
  1844.  
  1845.             fdt = files_fdtable(files);
  1846.             f_flags = file->f_flags & ~O_CLOEXEC;
  1847.             if (FD_ISSET(fd, fdt->close_on_exec))
  1848.                 f_flags |= O_CLOEXEC;
  1849.  
  1850.             if (path) {
  1851.                 *path = file->f_path;
  1852.                 path_get(&file->f_path);
  1853.             }
  1854.             if (info)
  1855.                 snprintf(info, PROC_FDINFO_MAX,
  1856.                      "pos:\t%lli\n"
  1857.                      "flags:\t0%o\n",
  1858.                      (long long) file->f_pos,
  1859.                      f_flags);
  1860.             spin_unlock(&files->file_lock);
  1861.             put_files_struct(files);
  1862.             return 0;
  1863.         }
  1864.         spin_unlock(&files->file_lock);
  1865.         put_files_struct(files);
  1866.     }
  1867.     return -ENOENT;
  1868. }
  1869.  
  1870. static int proc_fd_link(struct inode *inode, struct path *path)
  1871. {
  1872.     return proc_fd_info(inode, path, NULL);
  1873. }
  1874.  
  1875. static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
  1876. {
  1877.     struct inode *inode;
  1878.     struct task_struct *task;
  1879.     int fd;
  1880.     struct files_struct *files;
  1881.     const struct cred *cred;
  1882.  
  1883.     if (nd && nd->flags & LOOKUP_RCU)
  1884.         return -ECHILD;
  1885.  
  1886.     inode = dentry->d_inode;
  1887.     task = get_proc_task(inode);
  1888.     fd = proc_fd(inode);
  1889.  
  1890.     if (task) {
  1891.         files = get_files_struct(task);
  1892.         if (files) {
  1893.             rcu_read_lock();
  1894.             if (fcheck_files(files, fd)) {
  1895.                 rcu_read_unlock();
  1896.                 put_files_struct(files);
  1897.                 if (task_dumpable(task)) {
  1898.                     rcu_read_lock();
  1899.                     cred = __task_cred(task);
  1900.                     inode->i_uid = cred->euid;
  1901.                     inode->i_gid = cred->egid;
  1902.                     rcu_read_unlock();
  1903.                 } else {
  1904.                     inode->i_uid = 0;
  1905.                     inode->i_gid = 0;
  1906.                 }
  1907.                 inode->i_mode &= ~(S_ISUID | S_ISGID);
  1908.                 security_task_to_inode(task, inode);
  1909.                 put_task_struct(task);
  1910.                 return 1;
  1911.             }
  1912.             rcu_read_unlock();
  1913.             put_files_struct(files);
  1914.         }
  1915.         put_task_struct(task);
  1916.     }
  1917.     d_drop(dentry);
  1918.     return 0;
  1919. }
  1920.  
  1921. static const struct dentry_operations tid_fd_dentry_operations =
  1922. {
  1923.     .d_revalidate   = tid_fd_revalidate,
  1924.     .d_delete   = pid_delete_dentry,
  1925. };
  1926.  
  1927. static struct dentry *proc_fd_instantiate(struct inode *dir,
  1928.     struct dentry *dentry, struct task_struct *task, const void *ptr)
  1929. {
  1930.     unsigned fd = *(const unsigned *)ptr;
  1931.     struct file *file;
  1932.     struct files_struct *files;
  1933.     struct inode *inode;
  1934.     struct proc_inode *ei;
  1935.     struct dentry *error = ERR_PTR(-ENOENT);
  1936.  
  1937.     inode = proc_pid_make_inode(dir->i_sb, task);
  1938.     if (!inode)
  1939.         goto out;
  1940.     ei = PROC_I(inode);
  1941.     ei->fd = fd;
  1942.     files = get_files_struct(task);
  1943.     if (!files)
  1944.         goto out_iput;
  1945.     inode->i_mode = S_IFLNK;
  1946.  
  1947.     /*
  1948.      * We are not taking a ref to the file structure, so we must
  1949.      * hold ->file_lock.
  1950.      */
  1951.     spin_lock(&files->file_lock);
  1952.     file = fcheck_files(files, fd);
  1953.     if (!file)
  1954.         goto out_unlock;
  1955.     if (file->f_mode & FMODE_READ)
  1956.         inode->i_mode |= S_IRUSR | S_IXUSR;
  1957.     if (file->f_mode & FMODE_WRITE)
  1958.         inode->i_mode |= S_IWUSR | S_IXUSR;
  1959.     spin_unlock(&files->file_lock);
  1960.     put_files_struct(files);
  1961.  
  1962.     inode->i_op = &proc_pid_link_inode_operations;
  1963.     inode->i_size = 64;
  1964.     ei->op.proc_get_link = proc_fd_link;
  1965.     d_set_d_op(dentry, &tid_fd_dentry_operations);
  1966.     d_add(dentry, inode);
  1967.     /* Close the race of the process dying before we return the dentry */
  1968.     if (tid_fd_revalidate(dentry, NULL))
  1969.         error = NULL;
  1970.  
  1971.  out:
  1972.     return error;
  1973. out_unlock:
  1974.     spin_unlock(&files->file_lock);
  1975.     put_files_struct(files);
  1976. out_iput:
  1977.     iput(inode);
  1978.     goto out;
  1979. }
  1980.  
  1981. static struct dentry *proc_lookupfd_common(struct inode *dir,
  1982.                        struct dentry *dentry,
  1983.                        instantiate_t instantiate)
  1984. {
  1985.     struct task_struct *task = get_proc_task(dir);
  1986.     unsigned fd = name_to_int(dentry);
  1987.     struct dentry *result = ERR_PTR(-ENOENT);
  1988.  
  1989.     if (!task)
  1990.         goto out_no_task;
  1991.     if (fd == ~0U)
  1992.         goto out;
  1993.  
  1994.     result = instantiate(dir, dentry, task, &fd);
  1995. out:
  1996.     put_task_struct(task);
  1997. out_no_task:
  1998.     return result;
  1999. }
  2000.  
  2001. static int proc_readfd_common(struct file * filp, void * dirent,
  2002.                   filldir_t filldir, instantiate_t instantiate)
  2003. {
  2004.     struct dentry *dentry = filp->f_path.dentry;
  2005.     struct inode *inode = dentry->d_inode;
  2006.     struct task_struct *p = get_proc_task(inode);
  2007.     unsigned int fd, ino;
  2008.     int retval;
  2009.     struct files_struct * files;
  2010.  
  2011.     retval = -ENOENT;
  2012.     if (!p)
  2013.         goto out_no_task;
  2014.     retval = 0;
  2015.  
  2016.     fd = filp->f_pos;
  2017.     switch (fd) {
  2018.         case 0:
  2019.             if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
  2020.                 goto out;
  2021.             filp->f_pos++;
  2022.         case 1:
  2023.             ino = parent_ino(dentry);
  2024.             if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
  2025.                 goto out;
  2026.             filp->f_pos++;
  2027.         default:
  2028.             files = get_files_struct(p);
  2029.             if (!files)
  2030.                 goto out;
  2031.             rcu_read_lock();
  2032.             for (fd = filp->f_pos-2;
  2033.                  fd < files_fdtable(files)->max_fds;
  2034.                  fd++, filp->f_pos++) {
  2035.                 char name[PROC_NUMBUF];
  2036.                 int len;
  2037.  
  2038.                 if (!fcheck_files(files, fd))
  2039.                     continue;
  2040.                 rcu_read_unlock();
  2041.  
  2042.                 len = snprintf(name, sizeof(name), "%d", fd);
  2043.                 if (proc_fill_cache(filp, dirent, filldir,
  2044.                             name, len, instantiate,
  2045.                             p, &fd) < 0) {
  2046.                     rcu_read_lock();
  2047.                     break;
  2048.                 }
  2049.                 rcu_read_lock();
  2050.             }
  2051.             rcu_read_unlock();
  2052.             put_files_struct(files);
  2053.     }
  2054. out:
  2055.     put_task_struct(p);
  2056. out_no_task:
  2057.     return retval;
  2058. }
  2059.  
  2060. static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
  2061.                     struct nameidata *nd)
  2062. {
  2063.     return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
  2064. }
  2065.  
  2066. static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
  2067. {
  2068.     return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
  2069. }
  2070.  
  2071. static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
  2072.                       size_t len, loff_t *ppos)
  2073. {
  2074.     char tmp[PROC_FDINFO_MAX];
  2075.     int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
  2076.     if (!err)
  2077.         err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
  2078.     return err;
  2079. }
  2080.  
  2081. static const struct file_operations proc_fdinfo_file_operations = {
  2082.     .open           = nonseekable_open,
  2083.     .read       = proc_fdinfo_read,
  2084.     .llseek     = no_llseek,
  2085. };
  2086.  
  2087. static const struct file_operations proc_fd_operations = {
  2088.     .read       = generic_read_dir,
  2089.     .readdir    = proc_readfd,
  2090.     .llseek     = default_llseek,
  2091. };
  2092.  
  2093. /*
  2094.  * /proc/pid/fd needs a special permission handler so that a process can still
  2095.  * access /proc/self/fd after it has executed a setuid().
  2096.  */
  2097. static int proc_fd_permission(struct inode *inode, int mask)
  2098. {
  2099.     int rv = generic_permission(inode, mask);
  2100.     if (rv == 0)
  2101.         return 0;
  2102.     if (task_pid(current) == proc_pid(inode))
  2103.         rv = 0;
  2104.     return rv;
  2105. }
  2106.  
  2107. /*
  2108.  * proc directories can do almost nothing..
  2109.  */
  2110. static const struct inode_operations proc_fd_inode_operations = {
  2111.     .lookup     = proc_lookupfd,
  2112.     .permission = proc_fd_permission,
  2113.     .setattr    = proc_setattr,
  2114. };
  2115.  
  2116. static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
  2117.     struct dentry *dentry, struct task_struct *task, const void *ptr)
  2118. {
  2119.     unsigned fd = *(unsigned *)ptr;
  2120.     struct inode *inode;
  2121.     struct proc_inode *ei;
  2122.     struct dentry *error = ERR_PTR(-ENOENT);
  2123.  
  2124.     inode = proc_pid_make_inode(dir->i_sb, task);
  2125.     if (!inode)
  2126.         goto out;
  2127.     ei = PROC_I(inode);
  2128.     ei->fd = fd;
  2129.     inode->i_mode = S_IFREG | S_IRUSR;
  2130.     inode->i_fop = &proc_fdinfo_file_operations;
  2131.     d_set_d_op(dentry, &tid_fd_dentry_operations);
  2132.     d_add(dentry, inode);
  2133.     /* Close the race of the process dying before we return the dentry */
  2134.     if (tid_fd_revalidate(dentry, NULL))
  2135.         error = NULL;
  2136.  
  2137.  out:
  2138.     return error;
  2139. }
  2140.  
  2141. static struct dentry *proc_lookupfdinfo(struct inode *dir,
  2142.                     struct dentry *dentry,
  2143.                     struct nameidata *nd)
  2144. {
  2145.     return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
  2146. }
  2147.  
  2148. static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
  2149. {
  2150.     return proc_readfd_common(filp, dirent, filldir,
  2151.                   proc_fdinfo_instantiate);
  2152. }
  2153.  
  2154. static const struct file_operations proc_fdinfo_operations = {
  2155.     .read       = generic_read_dir,
  2156.     .readdir    = proc_readfdinfo,
  2157.     .llseek     = default_llseek,
  2158. };
  2159.  
  2160. /*
  2161.  * proc directories can do almost nothing..
  2162.  */
  2163. static const struct inode_operations proc_fdinfo_inode_operations = {
  2164.     .lookup     = proc_lookupfdinfo,
  2165.     .setattr    = proc_setattr,
  2166. };
  2167.  
  2168.  
  2169. static struct dentry *proc_pident_instantiate(struct inode *dir,
  2170.     struct dentry *dentry, struct task_struct *task, const void *ptr)
  2171. {
  2172.     const struct pid_entry *p = ptr;
  2173.     struct inode *inode;
  2174.     struct proc_inode *ei;
  2175.     struct dentry *error = ERR_PTR(-ENOENT);
  2176.  
  2177.     inode = proc_pid_make_inode(dir->i_sb, task);
  2178.     if (!inode)
  2179.         goto out;
  2180.  
  2181.     ei = PROC_I(inode);
  2182.     inode->i_mode = p->mode;
  2183.     if (S_ISDIR(inode->i_mode))
  2184.         set_nlink(inode, 2);    /* Use getattr to fix if necessary */
  2185.     if (p->iop)
  2186.         inode->i_op = p->iop;
  2187.     if (p->fop)
  2188.         inode->i_fop = p->fop;
  2189.     ei->op = p->op;
  2190.     d_set_d_op(dentry, &pid_dentry_operations);
  2191.     d_add(dentry, inode);
  2192.     /* Close the race of the process dying before we return the dentry */
  2193.     if (pid_revalidate(dentry, NULL))
  2194.         error = NULL;
  2195. out:
  2196.     return error;
  2197. }
  2198.  
  2199. static struct dentry *proc_pident_lookup(struct inode *dir,
  2200.                      struct dentry *dentry,
  2201.                      const struct pid_entry *ents,
  2202.                      unsigned int nents)
  2203. {
  2204.     struct dentry *error;
  2205.     struct task_struct *task = get_proc_task(dir);
  2206.     const struct pid_entry *p, *last;
  2207.  
  2208.     error = ERR_PTR(-ENOENT);
  2209.  
  2210.     if (!task)
  2211.         goto out_no_task;
  2212.  
  2213.     /*
  2214.      * Yes, it does not scale. And it should not. Don't add
  2215.      * new entries into /proc/<tgid>/ without very good reasons.
  2216.      */
  2217.     last = &ents[nents - 1];
  2218.     for (p = ents; p <= last; p++) {
  2219.         if (p->len != dentry->d_name.len)
  2220.             continue;
  2221.         if (!memcmp(dentry->d_name.name, p->name, p->len))
  2222.             break;
  2223.     }
  2224.     if (p > last)
  2225.         goto out;
  2226.  
  2227.     error = proc_pident_instantiate(dir, dentry, task, p);
  2228. out:
  2229.     put_task_struct(task);
  2230. out_no_task:
  2231.     return error;
  2232. }
  2233.  
  2234. static int proc_pident_fill_cache(struct file *filp, void *dirent,
  2235.     filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
  2236. {
  2237.     return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
  2238.                 proc_pident_instantiate, task, p);
  2239. }
  2240.  
  2241. static int proc_pident_readdir(struct file *filp,
  2242.         void *dirent, filldir_t filldir,
  2243.         const struct pid_entry *ents, unsigned int nents)
  2244. {
  2245.     int i;
  2246.     struct dentry *dentry = filp->f_path.dentry;
  2247.     struct inode *inode = dentry->d_inode;
  2248.     struct task_struct *task = get_proc_task(inode);
  2249.     const struct pid_entry *p, *last;
  2250.     ino_t ino;
  2251.     int ret;
  2252.  
  2253.     ret = -ENOENT;
  2254.     if (!task)
  2255.         goto out_no_task;
  2256.  
  2257.     ret = 0;
  2258.     i = filp->f_pos;
  2259.     switch (i) {
  2260.     case 0:
  2261.         ino = inode->i_ino;
  2262.         if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
  2263.             goto out;
  2264.         i++;
  2265.         filp->f_pos++;
  2266.         /* fall through */
  2267.     case 1:
  2268.         ino = parent_ino(dentry);
  2269.         if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
  2270.             goto out;
  2271.         i++;
  2272.         filp->f_pos++;
  2273.         /* fall through */
  2274.     default:
  2275.         i -= 2;
  2276.         if (i >= nents) {
  2277.             ret = 1;
  2278.             goto out;
  2279.         }
  2280.         p = ents + i;
  2281.         last = &ents[nents - 1];
  2282.         while (p <= last) {
  2283.             if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
  2284.                 goto out;
  2285.             filp->f_pos++;
  2286.             p++;
  2287.         }
  2288.     }
  2289.  
  2290.     ret = 1;
  2291. out:
  2292.     put_task_struct(task);
  2293. out_no_task:
  2294.     return ret;
  2295. }
  2296.  
  2297. #ifdef CONFIG_SECURITY
  2298. static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
  2299.                   size_t count, loff_t *ppos)
  2300. {
  2301.     struct inode * inode = file->f_path.dentry->d_inode;
  2302.     char *p = NULL;
  2303.     ssize_t length;
  2304.     struct task_struct *task = get_proc_task(inode);
  2305.  
  2306.     if (!task)
  2307.         return -ESRCH;
  2308.  
  2309.     length = security_getprocattr(task,
  2310.                       (char*)file->f_path.dentry->d_name.name,
  2311.                       &p);
  2312.     put_task_struct(task);
  2313.     if (length > 0)
  2314.         length = simple_read_from_buffer(buf, count, ppos, p, length);
  2315.     kfree(p);
  2316.     return length;
  2317. }
  2318.  
  2319. static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
  2320.                    size_t count, loff_t *ppos)
  2321. {
  2322.     struct inode * inode = file->f_path.dentry->d_inode;
  2323.     char *page;
  2324.     ssize_t length;
  2325.     struct task_struct *task = get_proc_task(inode);
  2326.  
  2327.     length = -ESRCH;
  2328.     if (!task)
  2329.         goto out_no_task;
  2330.     if (count > PAGE_SIZE)
  2331.         count = PAGE_SIZE;
  2332.  
  2333.     /* No partial writes. */
  2334.     length = -EINVAL;
  2335.     if (*ppos != 0)
  2336.         goto out;
  2337.  
  2338.     length = -ENOMEM;
  2339.     page = (char*)__get_free_page(GFP_TEMPORARY);
  2340.     if (!page)
  2341.         goto out;
  2342.  
  2343.     length = -EFAULT;
  2344.     if (copy_from_user(page, buf, count))
  2345.         goto out_free;
  2346.  
  2347.     /* Guard against adverse ptrace interaction */
  2348.     length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
  2349.     if (length < 0)
  2350.         goto out_free;
  2351.  
  2352.     length = security_setprocattr(task,
  2353.                       (char*)file->f_path.dentry->d_name.name,
  2354.                       (void*)page, count);
  2355.     mutex_unlock(&task->signal->cred_guard_mutex);
  2356. out_free:
  2357.     free_page((unsigned long) page);
  2358. out:
  2359.     put_task_struct(task);
  2360. out_no_task:
  2361.     return length;
  2362. }
  2363.  
  2364. static const struct file_operations proc_pid_attr_operations = {
  2365.     .read       = proc_pid_attr_read,
  2366.     .write      = proc_pid_attr_write,
  2367.     .llseek     = generic_file_llseek,
  2368. };
  2369.  
  2370. static const struct pid_entry attr_dir_stuff[] = {
  2371.     REG("current",    S_IRUGO|S_IWUGO, proc_pid_attr_operations),
  2372.     REG("prev",       S_IRUGO,     proc_pid_attr_operations),
  2373.     REG("exec",       S_IRUGO|S_IWUGO, proc_pid_attr_operations),
  2374.     REG("fscreate",   S_IRUGO|S_IWUGO, proc_pid_attr_operations),
  2375.     REG("keycreate",  S_IRUGO|S_IWUGO, proc_pid_attr_operations),
  2376.     REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
  2377. };
  2378.  
  2379. static int proc_attr_dir_readdir(struct file * filp,
  2380.                  void * dirent, filldir_t filldir)
  2381. {
  2382.     return proc_pident_readdir(filp,dirent,filldir,
  2383.                    attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
  2384. }
  2385.  
  2386. static const struct file_operations proc_attr_dir_operations = {
  2387.     .read       = generic_read_dir,
  2388.     .readdir    = proc_attr_dir_readdir,
  2389.     .llseek     = default_llseek,
  2390. };
  2391.  
  2392. static struct dentry *proc_attr_dir_lookup(struct inode *dir,
  2393.                 struct dentry *dentry, struct nameidata *nd)
  2394. {
  2395.     return proc_pident_lookup(dir, dentry,
  2396.                   attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
  2397. }
  2398.  
  2399. static const struct inode_operations proc_attr_dir_inode_operations = {
  2400.     .lookup     = proc_attr_dir_lookup,
  2401.     .getattr    = pid_getattr,
  2402.     .setattr    = proc_setattr,
  2403. };
  2404.  
  2405. #endif
  2406.  
  2407. #ifdef CONFIG_ELF_CORE
  2408. static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
  2409.                      size_t count, loff_t *ppos)
  2410. {
  2411.     struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
  2412.     struct mm_struct *mm;
  2413.     char buffer[PROC_NUMBUF];
  2414.     size_t len;
  2415.     int ret;
  2416.  
  2417.     if (!task)
  2418.         return -ESRCH;
  2419.  
  2420.     ret = 0;
  2421.     mm = get_task_mm(task);
  2422.     if (mm) {
  2423.         len = snprintf(buffer, sizeof(buffer), "%08lx\n",
  2424.                    ((mm->flags & MMF_DUMP_FILTER_MASK) >>
  2425.                 MMF_DUMP_FILTER_SHIFT));
  2426.         mmput(mm);
  2427.         ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
  2428.     }
  2429.  
  2430.     put_task_struct(task);
  2431.  
  2432.     return ret;
  2433. }
  2434.  
  2435. static ssize_t proc_coredump_filter_write(struct file *file,
  2436.                       const char __user *buf,
  2437.                       size_t count,
  2438.                       loff_t *ppos)
  2439. {
  2440.     struct task_struct *task;
  2441.     struct mm_struct *mm;
  2442.     char buffer[PROC_NUMBUF], *end;
  2443.     unsigned int val;
  2444.     int ret;
  2445.     int i;
  2446.     unsigned long mask;
  2447.  
  2448.     ret = -EFAULT;
  2449.     memset(buffer, 0, sizeof(buffer));
  2450.     if (count > sizeof(buffer) - 1)
  2451.         count = sizeof(buffer) - 1;
  2452.     if (copy_from_user(buffer, buf, count))
  2453.         goto out_no_task;
  2454.  
  2455.     ret = -EINVAL;
  2456.     val = (unsigned int)simple_strtoul(buffer, &end, 0);
  2457.     if (*end == '\n')
  2458.         end++;
  2459.     if (end - buffer == 0)
  2460.         goto out_no_task;
  2461.  
  2462.     ret = -ESRCH;
  2463.     task = get_proc_task(file->f_dentry->d_inode);
  2464.     if (!task)
  2465.         goto out_no_task;
  2466.  
  2467.     ret = end - buffer;
  2468.     mm = get_task_mm(task);
  2469.     if (!mm)
  2470.         goto out_no_mm;
  2471.  
  2472.     for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
  2473.         if (val & mask)
  2474.             set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
  2475.         else
  2476.             clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
  2477.     }
  2478.  
  2479.     mmput(mm);
  2480.  out_no_mm:
  2481.     put_task_struct(task);
  2482.  out_no_task:
  2483.     return ret;
  2484. }
  2485.  
  2486. static const struct file_operations proc_coredump_filter_operations = {
  2487.     .read       = proc_coredump_filter_read,
  2488.     .write      = proc_coredump_filter_write,
  2489.     .llseek     = generic_file_llseek,
  2490. };
  2491. #endif
  2492.  
  2493. /*
  2494.  * /proc/self:
  2495.  */
  2496. static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
  2497.                   int buflen)
  2498. {
  2499.     struct pid_namespace *ns = dentry->d_sb->s_fs_info;
  2500.     pid_t tgid = task_tgid_nr_ns(current, ns);
  2501.     char tmp[PROC_NUMBUF];
  2502.     if (!tgid)
  2503.         return -ENOENT;
  2504.     sprintf(tmp, "%d", tgid);
  2505.     return vfs_readlink(dentry,buffer,buflen,tmp);
  2506. }
  2507.  
  2508. static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
  2509. {
  2510.     struct pid_namespace *ns = dentry->d_sb->s_fs_info;
  2511.     pid_t tgid = task_tgid_nr_ns(current, ns);
  2512.     char *name = ERR_PTR(-ENOENT);
  2513.     if (tgid) {
  2514.         name = __getname();
  2515.         if (!name)
  2516.             name = ERR_PTR(-ENOMEM);
  2517.         else
  2518.             sprintf(name, "%d", tgid);
  2519.     }
  2520.     nd_set_link(nd, name);
  2521.     return NULL;
  2522. }
  2523.  
  2524. static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
  2525.                 void *cookie)
  2526. {
  2527.     char *s = nd_get_link(nd);
  2528.     if (!IS_ERR(s))
  2529.         __putname(s);
  2530. }
  2531.  
  2532. static const struct inode_operations proc_self_inode_operations = {
  2533.     .readlink   = proc_self_readlink,
  2534.     .follow_link    = proc_self_follow_link,
  2535.     .put_link   = proc_self_put_link,
  2536. };
  2537.  
  2538. /*
  2539.  * proc base
  2540.  *
  2541.  * These are the directory entries in the root directory of /proc
  2542.  * that properly belong to the /proc filesystem, as they describe
  2543.  * describe something that is process related.
  2544.  */
  2545. static const struct pid_entry proc_base_stuff[] = {
  2546.     NOD("self", S_IFLNK|S_IRWXUGO,
  2547.         &proc_self_inode_operations, NULL, {}),
  2548. };
  2549.  
  2550. static struct dentry *proc_base_instantiate(struct inode *dir,
  2551.     struct dentry *dentry, struct task_struct *task, const void *ptr)
  2552. {
  2553.     const struct pid_entry *p = ptr;
  2554.     struct inode *inode;
  2555.     struct proc_inode *ei;
  2556.     struct dentry *error;
  2557.  
  2558.     /* Allocate the inode */
  2559.     error = ERR_PTR(-ENOMEM);
  2560.     inode = new_inode(dir->i_sb);
  2561.     if (!inode)
  2562.         goto out;
  2563.  
  2564.     /* Initialize the inode */
  2565.     ei = PROC_I(inode);
  2566.     inode->i_ino = get_next_ino();
  2567.     inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  2568.  
  2569.     /*
  2570.      * grab the reference to the task.
  2571.      */
  2572.     ei->pid = get_task_pid(task, PIDTYPE_PID);
  2573.     if (!ei->pid)
  2574.         goto out_iput;
  2575.  
  2576.     inode->i_mode = p->mode;
  2577.     if (S_ISDIR(inode->i_mode))
  2578.         set_nlink(inode, 2);
  2579.     if (S_ISLNK(inode->i_mode))
  2580.         inode->i_size = 64;
  2581.     if (p->iop)
  2582.         inode->i_op = p->iop;
  2583.     if (p->fop)
  2584.         inode->i_fop = p->fop;
  2585.     ei->op = p->op;
  2586.     d_add(dentry, inode);
  2587.     error = NULL;
  2588. out:
  2589.     return error;
  2590. out_iput:
  2591.     iput(inode);
  2592.     goto out;
  2593. }
  2594.  
  2595. static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
  2596. {
  2597.     struct dentry *error;
  2598.     struct task_struct *task = get_proc_task(dir);
  2599.     const struct pid_entry *p, *last;
  2600.  
  2601.     error = ERR_PTR(-ENOENT);
  2602.  
  2603.     if (!task)
  2604.         goto out_no_task;
  2605.  
  2606.     /* Lookup the directory entry */
  2607.     last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
  2608.     for (p = proc_base_stuff; p <= last; p++) {
  2609.         if (p->len != dentry->d_name.len)
  2610.             continue;
  2611.         if (!memcmp(dentry->d_name.name, p->name, p->len))
  2612.             break;
  2613.     }
  2614.     if (p > last)
  2615.         goto out;
  2616.  
  2617.     error = proc_base_instantiate(dir, dentry, task, p);
  2618.  
  2619. out:
  2620.     put_task_struct(task);
  2621. out_no_task:
  2622.     return error;
  2623. }
  2624.  
  2625. static int proc_base_fill_cache(struct file *filp, void *dirent,
  2626.     filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
  2627. {
  2628.     return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
  2629.                 proc_base_instantiate, task, p);
  2630. }
  2631.  
  2632. #ifdef CONFIG_TASK_IO_ACCOUNTING
  2633. static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
  2634. {
  2635.     struct task_io_accounting acct = task->ioac;
  2636.     unsigned long flags;
  2637.     int result;
  2638.  
  2639.     result = mutex_lock_killable(&task->signal->cred_guard_mutex);
  2640.     if (result)
  2641.         return result;
  2642.  
  2643.     if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
  2644.         result = -EACCES;
  2645.         goto out_unlock;
  2646.     }
  2647.  
  2648.     if (whole && lock_task_sighand(task, &flags)) {
  2649.         struct task_struct *t = task;
  2650.  
  2651.         task_io_accounting_add(&acct, &task->signal->ioac);
  2652.         while_each_thread(task, t)
  2653.             task_io_accounting_add(&acct, &t->ioac);
  2654.  
  2655.         unlock_task_sighand(task, &flags);
  2656.     }
  2657.     result = sprintf(buffer,
  2658.             "rchar: %llu\n"
  2659.             "wchar: %llu\n"
  2660.             "syscr: %llu\n"
  2661.             "syscw: %llu\n"
  2662.             "read_bytes: %llu\n"
  2663.             "write_bytes: %llu\n"
  2664.             "cancelled_write_bytes: %llu\n",
  2665.             (unsigned long long)acct.rchar,
  2666.             (unsigned long long)acct.wchar,
  2667.             (unsigned long long)acct.syscr,
  2668.             (unsigned long long)acct.syscw,
  2669.             (unsigned long long)acct.read_bytes,
  2670.             (unsigned long long)acct.write_bytes,
  2671.             (unsigned long long)acct.cancelled_write_bytes);
  2672. out_unlock:
  2673.     mutex_unlock(&task->signal->cred_guard_mutex);
  2674.     return result;
  2675. }
  2676.  
  2677. static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
  2678. {
  2679.     return do_io_accounting(task, buffer, 0);
  2680. }
  2681.  
  2682. static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
  2683. {
  2684.     return do_io_accounting(task, buffer, 1);
  2685. }
  2686. #endif /* CONFIG_TASK_IO_ACCOUNTING */
  2687.  
  2688. static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
  2689.                 struct pid *pid, struct task_struct *task)
  2690. {
  2691.     int err = lock_trace(task);
  2692.     if (!err) {
  2693.         seq_printf(m, "%08x\n", task->personality);
  2694.         unlock_trace(task);
  2695.     }
  2696.     return err;
  2697. }
  2698.  
  2699. /*
  2700.  * Thread groups
  2701.  */
  2702. static const struct file_operations proc_task_operations;
  2703. static const struct inode_operations proc_task_inode_operations;
  2704.  
  2705. static const struct pid_entry tgid_base_stuff[] = {
  2706.     DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
  2707.     DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
  2708.     DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
  2709.     DIR("ns",     S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
  2710. #ifdef CONFIG_NET
  2711.     DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
  2712. #endif
  2713.     REG("environ",    S_IRUSR, proc_environ_operations),
  2714.     INF("auxv",       S_IRUSR, proc_pid_auxv),
  2715.     ONE("status",     S_IRUGO, proc_pid_status),
  2716.     ONE("personality", S_IRUGO, proc_pid_personality),
  2717.     INF("limits",     S_IRUGO, proc_pid_limits),
  2718. #ifdef CONFIG_SCHED_DEBUG
  2719.     REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
  2720. #endif
  2721. #ifdef CONFIG_SCHED_AUTOGROUP
  2722.     REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
  2723. #endif
  2724.     REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
  2725. #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
  2726.     INF("syscall",    S_IRUGO, proc_pid_syscall),
  2727. #endif
  2728.     INF("cmdline",    S_IRUGO, proc_pid_cmdline),
  2729.     ONE("stat",       S_IRUGO, proc_tgid_stat),
  2730.     ONE("statm",      S_IRUGO, proc_pid_statm),
  2731.     REG("maps",       S_IRUGO, proc_maps_operations),
  2732. #ifdef CONFIG_NUMA
  2733.     REG("numa_maps",  S_IRUGO, proc_numa_maps_operations),
  2734. #endif
  2735.     REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
  2736.     LNK("cwd",        proc_cwd_link),
  2737.     LNK("root",       proc_root_link),
  2738.     LNK("exe",        proc_exe_link),
  2739.     REG("mounts",     S_IRUGO, proc_mounts_operations),
  2740.     REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
  2741.     REG("mountstats", S_IRUSR, proc_mountstats_operations),
  2742. #ifdef CONFIG_PROC_PAGE_MONITOR
  2743.     REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
  2744.     REG("smaps",      S_IRUGO, proc_smaps_operations),
  2745.     REG("pagemap",    S_IRUGO, proc_pagemap_operations),
  2746. #endif
  2747. #ifdef CONFIG_SECURITY
  2748.     DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
  2749. #endif
  2750. #ifdef CONFIG_KALLSYMS
  2751.     INF("wchan",      S_IRUGO, proc_pid_wchan),
  2752. #endif
  2753. #ifdef CONFIG_STACKTRACE
  2754.     ONE("stack",      S_IRUGO, proc_pid_stack),
  2755. #endif
  2756. #ifdef CONFIG_SCHEDSTATS
  2757.     INF("schedstat",  S_IRUGO, proc_pid_schedstat),
  2758. #endif
  2759. #ifdef CONFIG_LATENCYTOP
  2760.     REG("latency",  S_IRUGO, proc_lstats_operations),
  2761. #endif
  2762. #ifdef CONFIG_PROC_PID_CPUSET
  2763.     REG("cpuset",     S_IRUGO, proc_cpuset_operations),
  2764. #endif
  2765. #ifdef CONFIG_CGROUPS
  2766.     REG("cgroup",  S_IRUGO, proc_cgroup_operations),
  2767. #endif
  2768.     INF("oom_score",  S_IRUGO, proc_oom_score),
  2769.     REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
  2770.     REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
  2771. #ifdef CONFIG_AUDITSYSCALL
  2772.     REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
  2773.     REG("sessionid",  S_IRUGO, proc_sessionid_operations),
  2774. #endif
  2775. #ifdef CONFIG_FAULT_INJECTION
  2776.     REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
  2777. #endif
  2778. #ifdef CONFIG_ELF_CORE
  2779.     REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
  2780. #endif
  2781. #ifdef CONFIG_TASK_IO_ACCOUNTING
  2782.     INF("io",   S_IRUSR, proc_tgid_io_accounting),
  2783. #endif
  2784. #ifdef CONFIG_HARDWALL
  2785.     INF("hardwall",   S_IRUGO, proc_pid_hardwall),
  2786. #endif
  2787. };
  2788.  
  2789. static int proc_tgid_base_readdir(struct file * filp,
  2790.                  void * dirent, filldir_t filldir)
  2791. {
  2792.     return proc_pident_readdir(filp,dirent,filldir,
  2793.                    tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
  2794. }
  2795.  
  2796. static const struct file_operations proc_tgid_base_operations = {
  2797.     .read       = generic_read_dir,
  2798.     .readdir    = proc_tgid_base_readdir,
  2799.     .llseek     = default_llseek,
  2800. };
  2801.  
  2802. static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
  2803.     return proc_pident_lookup(dir, dentry,
  2804.                   tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
  2805. }
  2806.  
  2807. static const struct inode_operations proc_tgid_base_inode_operations = {
  2808.     .lookup     = proc_tgid_base_lookup,
  2809.     .getattr    = pid_getattr,
  2810.     .setattr    = proc_setattr,
  2811. };
  2812.  
  2813. static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
  2814. {
  2815.     struct dentry *dentry, *leader, *dir;
  2816.     char buf[PROC_NUMBUF];
  2817.     struct qstr name;
  2818.  
  2819.     name.name = buf;
  2820.     name.len = snprintf(buf, sizeof(buf), "%d", pid);
  2821.     dentry = d_hash_and_lookup(mnt->mnt_root, &name);
  2822.     if (dentry) {
  2823.         shrink_dcache_parent(dentry);
  2824.         d_drop(dentry);
  2825.         dput(dentry);
  2826.     }
  2827.  
  2828.     name.name = buf;
  2829.     name.len = snprintf(buf, sizeof(buf), "%d", tgid);
  2830.     leader = d_hash_and_lookup(mnt->mnt_root, &name);
  2831.     if (!leader)
  2832.         goto out;
  2833.  
  2834.     name.name = "task";
  2835.     name.len = strlen(name.name);
  2836.     dir = d_hash_and_lookup(leader, &name);
  2837.     if (!dir)
  2838.         goto out_put_leader;
  2839.  
  2840.     name.name = buf;
  2841.     name.len = snprintf(buf, sizeof(buf), "%d", pid);
  2842.     dentry = d_hash_and_lookup(dir, &name);
  2843.     if (dentry) {
  2844.         shrink_dcache_parent(dentry);
  2845.         d_drop(dentry);
  2846.         dput(dentry);
  2847.     }
  2848.  
  2849.     dput(dir);
  2850. out_put_leader:
  2851.     dput(leader);
  2852. out:
  2853.     return;
  2854. }
  2855.  
  2856. /**
  2857.  * proc_flush_task -  Remove dcache entries for @task from the /proc dcache.
  2858.  * @task: task that should be flushed.
  2859.  *
  2860.  * When flushing dentries from proc, one needs to flush them from global
  2861.  * proc (proc_mnt) and from all the namespaces' procs this task was seen
  2862.  * in. This call is supposed to do all of this job.
  2863.  *
  2864.  * Looks in the dcache for
  2865.  * /proc/@pid
  2866.  * /proc/@tgid/task/@pid
  2867.  * if either directory is present flushes it and all of it'ts children
  2868.  * from the dcache.
  2869.  *
  2870.  * It is safe and reasonable to cache /proc entries for a task until
  2871.  * that task exits.  After that they just clog up the dcache with
  2872.  * useless entries, possibly causing useful dcache entries to be
  2873.  * flushed instead.  This routine is proved to flush those useless
  2874.  * dcache entries at process exit time.
  2875.  *
  2876.  * NOTE: This routine is just an optimization so it does not guarantee
  2877.  *       that no dcache entries will exist at process exit time it
  2878.  *       just makes it very unlikely that any will persist.
  2879.  */
  2880.  
  2881. void proc_flush_task(struct task_struct *task)
  2882. {
  2883.     int i;
  2884.     struct pid *pid, *tgid;
  2885.     struct upid *upid;
  2886.  
  2887.     pid = task_pid(task);
  2888.     tgid = task_tgid(task);
  2889.  
  2890.     for (i = 0; i <= pid->level; i++) {
  2891.         upid = &pid->numbers[i];
  2892.         proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
  2893.                     tgid->numbers[i].nr);
  2894.     }
  2895.  
  2896.     upid = &pid->numbers[pid->level];
  2897.     if (upid->nr == 1)
  2898.         pid_ns_release_proc(upid->ns);
  2899. }
  2900.  
  2901. static struct dentry *proc_pid_instantiate(struct inode *dir,
  2902.                        struct dentry * dentry,
  2903.                        struct task_struct *task, const void *ptr)
  2904. {
  2905.     struct dentry *error = ERR_PTR(-ENOENT);
  2906.     struct inode *inode;
  2907.  
  2908.     inode = proc_pid_make_inode(dir->i_sb, task);
  2909.     if (!inode)
  2910.         goto out;
  2911.  
  2912.     inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
  2913.     inode->i_op = &proc_tgid_base_inode_operations;
  2914.     inode->i_fop = &proc_tgid_base_operations;
  2915.     inode->i_flags|=S_IMMUTABLE;
  2916.  
  2917.     set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
  2918.                           ARRAY_SIZE(tgid_base_stuff)));
  2919.  
  2920.     d_set_d_op(dentry, &pid_dentry_operations);
  2921.  
  2922.     d_add(dentry, inode);
  2923.     /* Close the race of the process dying before we return the dentry */
  2924.     if (pid_revalidate(dentry, NULL))
  2925.         error = NULL;
  2926. out:
  2927.     return error;
  2928. }
  2929.  
  2930. struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
  2931. {
  2932.     struct dentry *result;
  2933.     struct task_struct *task;
  2934.     unsigned tgid;
  2935.     struct pid_namespace *ns;
  2936.  
  2937.     result = proc_base_lookup(dir, dentry);
  2938.     if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
  2939.         goto out;
  2940.  
  2941.     tgid = name_to_int(dentry);
  2942.     if (tgid == ~0U)
  2943.         goto out;
  2944.  
  2945.     ns = dentry->d_sb->s_fs_info;
  2946.     rcu_read_lock();
  2947.     task = find_task_by_pid_ns(tgid, ns);
  2948.     if (task)
  2949.         get_task_struct(task);
  2950.     rcu_read_unlock();
  2951.     if (!task)
  2952.         goto out;
  2953.  
  2954.     result = proc_pid_instantiate(dir, dentry, task, NULL);
  2955.     put_task_struct(task);
  2956. out:
  2957.     return result;
  2958. }
  2959.  
  2960. /*
  2961.  * Find the first task with tgid >= tgid
  2962.  *
  2963.  */
  2964. struct tgid_iter {
  2965.     unsigned int tgid;
  2966.     struct task_struct *task;
  2967. };
  2968. static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
  2969. {
  2970.     struct pid *pid;
  2971.  
  2972.     if (iter.task)
  2973.         put_task_struct(iter.task);
  2974.     rcu_read_lock();
  2975. retry:
  2976.     iter.task = NULL;
  2977.     pid = find_ge_pid(iter.tgid, ns);
  2978.     if (pid) {
  2979.         iter.tgid = pid_nr_ns(pid, ns);
  2980.         iter.task = pid_task(pid, PIDTYPE_PID);
  2981.         /* What we to know is if the pid we have find is the
  2982.          * pid of a thread_group_leader.  Testing for task
  2983.          * being a thread_group_leader is the obvious thing
  2984.          * todo but there is a window when it fails, due to
  2985.          * the pid transfer logic in de_thread.
  2986.          *
  2987.          * So we perform the straight forward test of seeing
  2988.          * if the pid we have found is the pid of a thread
  2989.          * group leader, and don't worry if the task we have
  2990.          * found doesn't happen to be a thread group leader.
  2991.          * As we don't care in the case of readdir.
  2992.          */
  2993.         if (!iter.task || !has_group_leader_pid(iter.task)) {
  2994.             iter.tgid += 1;
  2995.             goto retry;
  2996.         }
  2997.         get_task_struct(iter.task);
  2998.     }
  2999.     rcu_read_unlock();
  3000.     return iter;
  3001. }
  3002.  
  3003. #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
  3004.  
  3005. static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
  3006.     struct tgid_iter iter)
  3007. {
  3008.     char name[PROC_NUMBUF];
  3009.     int len = snprintf(name, sizeof(name), "%d", iter.tgid);
  3010.     return proc_fill_cache(filp, dirent, filldir, name, len,
  3011.                 proc_pid_instantiate, iter.task, NULL);
  3012. }
  3013.  
  3014. /* for the /proc/ directory itself, after non-process stuff has been done */
  3015. int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
  3016. {
  3017.     unsigned int nr;
  3018.     struct task_struct *reaper;
  3019.     struct tgid_iter iter;
  3020.     struct pid_namespace *ns;
  3021.  
  3022.     if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
  3023.         goto out_no_task;
  3024.     nr = filp->f_pos - FIRST_PROCESS_ENTRY;
  3025.  
  3026.     reaper = get_proc_task(filp->f_path.dentry->d_inode);
  3027.     if (!reaper)
  3028.         goto out_no_task;
  3029.  
  3030.     for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
  3031.         const struct pid_entry *p = &proc_base_stuff[nr];
  3032.         if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
  3033.             goto out;
  3034.     }
  3035.  
  3036.     ns = filp->f_dentry->d_sb->s_fs_info;
  3037.     iter.task = NULL;
  3038.     iter.tgid = filp->f_pos - TGID_OFFSET;
  3039.     for (iter = next_tgid(ns, iter);
  3040.          iter.task;
  3041.          iter.tgid += 1, iter = next_tgid(ns, iter)) {
  3042.         filp->f_pos = iter.tgid + TGID_OFFSET;
  3043.         if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
  3044.             put_task_struct(iter.task);
  3045.             goto out;
  3046.         }
  3047.     }
  3048.     filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
  3049. out:
  3050.     put_task_struct(reaper);
  3051. out_no_task:
  3052.     return 0;
  3053. }
  3054.  
  3055. /*
  3056.  * Tasks
  3057.  */
  3058. static const struct pid_entry tid_base_stuff[] = {
  3059.     DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
  3060.     DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
  3061.     DIR("ns",    S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
  3062.     REG("environ",   S_IRUSR, proc_environ_operations),
  3063.     INF("auxv",      S_IRUSR, proc_pid_auxv),
  3064.     ONE("status",    S_IRUGO, proc_pid_status),
  3065.     ONE("personality", S_IRUGO, proc_pid_personality),
  3066.     INF("limits",    S_IRUGO, proc_pid_limits),
  3067. #ifdef CONFIG_SCHED_DEBUG
  3068.     REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
  3069. #endif
  3070.     REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
  3071. #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
  3072.     INF("syscall",   S_IRUGO, proc_pid_syscall),
  3073. #endif
  3074.     INF("cmdline",   S_IRUGO, proc_pid_cmdline),
  3075.     ONE("stat",      S_IRUGO, proc_tid_stat),
  3076.     ONE("statm",     S_IRUGO, proc_pid_statm),
  3077.     REG("maps",      S_IRUGO, proc_maps_operations),
  3078. #ifdef CONFIG_NUMA
  3079.     REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
  3080. #endif
  3081.     REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
  3082.     LNK("cwd",       proc_cwd_link),
  3083.     LNK("root",      proc_root_link),
  3084.     LNK("exe",       proc_exe_link),
  3085.     REG("mounts",    S_IRUGO, proc_mounts_operations),
  3086.     REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
  3087. #ifdef CONFIG_PROC_PAGE_MONITOR
  3088.     REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
  3089.     REG("smaps",     S_IRUGO, proc_smaps_operations),
  3090.     REG("pagemap",    S_IRUGO, proc_pagemap_operations),
  3091. #endif
  3092. #ifdef CONFIG_SECURITY
  3093.     DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
  3094. #endif
  3095. #ifdef CONFIG_KALLSYMS
  3096.     INF("wchan",     S_IRUGO, proc_pid_wchan),
  3097. #endif
  3098. #ifdef CONFIG_STACKTRACE
  3099.     ONE("stack",      S_IRUGO, proc_pid_stack),
  3100. #endif
  3101. #ifdef CONFIG_SCHEDSTATS
  3102.     INF("schedstat", S_IRUGO, proc_pid_schedstat),
  3103. #endif
  3104. #ifdef CONFIG_LATENCYTOP
  3105.     REG("latency",  S_IRUGO, proc_lstats_operations),
  3106. #endif
  3107. #ifdef CONFIG_PROC_PID_CPUSET
  3108.     REG("cpuset",    S_IRUGO, proc_cpuset_operations),
  3109. #endif
  3110. #ifdef CONFIG_CGROUPS
  3111.     REG("cgroup",  S_IRUGO, proc_cgroup_operations),
  3112. #endif
  3113.     INF("oom_score", S_IRUGO, proc_oom_score),
  3114.     REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
  3115.     REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
  3116. #ifdef CONFIG_AUDITSYSCALL
  3117.     REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
  3118.     REG("sessionid",  S_IRUGO, proc_sessionid_operations),
  3119. #endif
  3120. #ifdef CONFIG_FAULT_INJECTION
  3121.     REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
  3122. #endif
  3123. #ifdef CONFIG_TASK_IO_ACCOUNTING
  3124.     INF("io",   S_IRUSR, proc_tid_io_accounting),
  3125. #endif
  3126. #ifdef CONFIG_HARDWALL
  3127.     INF("hardwall",   S_IRUGO, proc_pid_hardwall),
  3128. #endif
  3129. };
  3130.  
  3131. static int proc_tid_base_readdir(struct file * filp,
  3132.                  void * dirent, filldir_t filldir)
  3133. {
  3134.     return proc_pident_readdir(filp,dirent,filldir,
  3135.                    tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
  3136. }
  3137.  
  3138. static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
  3139.     return proc_pident_lookup(dir, dentry,
  3140.                   tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
  3141. }
  3142.  
  3143. static const struct file_operations proc_tid_base_operations = {
  3144.     .read       = generic_read_dir,
  3145.     .readdir    = proc_tid_base_readdir,
  3146.     .llseek     = default_llseek,
  3147. };
  3148.  
  3149. static const struct inode_operations proc_tid_base_inode_operations = {
  3150.     .lookup     = proc_tid_base_lookup,
  3151.     .getattr    = pid_getattr,
  3152.     .setattr    = proc_setattr,
  3153. };
  3154.  
  3155. static struct dentry *proc_task_instantiate(struct inode *dir,
  3156.     struct dentry *dentry, struct task_struct *task, const void *ptr)
  3157. {
  3158.     struct dentry *error = ERR_PTR(-ENOENT);
  3159.     struct inode *inode;
  3160.     inode = proc_pid_make_inode(dir->i_sb, task);
  3161.  
  3162.     if (!inode)
  3163.         goto out;
  3164.     inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
  3165.     inode->i_op = &proc_tid_base_inode_operations;
  3166.     inode->i_fop = &proc_tid_base_operations;
  3167.     inode->i_flags|=S_IMMUTABLE;
  3168.  
  3169.     set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
  3170.                           ARRAY_SIZE(tid_base_stuff)));
  3171.  
  3172.     d_set_d_op(dentry, &pid_dentry_operations);
  3173.  
  3174.     d_add(dentry, inode);
  3175.     /* Close the race of the process dying before we return the dentry */
  3176.     if (pid_revalidate(dentry, NULL))
  3177.         error = NULL;
  3178. out:
  3179.     return error;
  3180. }
  3181.  
  3182. static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
  3183. {
  3184.     struct dentry *result = ERR_PTR(-ENOENT);
  3185.     struct task_struct *task;
  3186.     struct task_struct *leader = get_proc_task(dir);
  3187.     unsigned tid;
  3188.     struct pid_namespace *ns;
  3189.  
  3190.     if (!leader)
  3191.         goto out_no_task;
  3192.  
  3193.     tid = name_to_int(dentry);
  3194.     if (tid == ~0U)
  3195.         goto out;
  3196.  
  3197.     ns = dentry->d_sb->s_fs_info;
  3198.     rcu_read_lock();
  3199.     task = find_task_by_pid_ns(tid, ns);
  3200.     if (task)
  3201.         get_task_struct(task);
  3202.     rcu_read_unlock();
  3203.     if (!task)
  3204.         goto out;
  3205.     if (!same_thread_group(leader, task))
  3206.         goto out_drop_task;
  3207.  
  3208.     result = proc_task_instantiate(dir, dentry, task, NULL);
  3209. out_drop_task:
  3210.     put_task_struct(task);
  3211. out:
  3212.     put_task_struct(leader);
  3213. out_no_task:
  3214.     return result;
  3215. }
  3216.  
  3217. /*
  3218.  * Find the first tid of a thread group to return to user space.
  3219.  *
  3220.  * Usually this is just the thread group leader, but if the users
  3221.  * buffer was too small or there was a seek into the middle of the
  3222.  * directory we have more work todo.
  3223.  *
  3224.  * In the case of a short read we start with find_task_by_pid.
  3225.  *
  3226.  * In the case of a seek we start with the leader and walk nr
  3227.  * threads past it.
  3228.  */
  3229. static struct task_struct *first_tid(struct task_struct *leader,
  3230.         int tid, int nr, struct pid_namespace *ns)
  3231. {
  3232.     struct task_struct *pos;
  3233.  
  3234.     rcu_read_lock();
  3235.     /* Attempt to start with the pid of a thread */
  3236.     if (tid && (nr > 0)) {
  3237.         pos = find_task_by_pid_ns(tid, ns);
  3238.         if (pos && (pos->group_leader == leader))
  3239.             goto found;
  3240.     }
  3241.  
  3242.     /* If nr exceeds the number of threads there is nothing todo */
  3243.     pos = NULL;
  3244.     if (nr && nr >= get_nr_threads(leader))
  3245.         goto out;
  3246.  
  3247.     /* If we haven't found our starting place yet start
  3248.      * with the leader and walk nr threads forward.
  3249.      */
  3250.     for (pos = leader; nr > 0; --nr) {
  3251.         pos = next_thread(pos);
  3252.         if (pos == leader) {
  3253.             pos = NULL;
  3254.             goto out;
  3255.         }
  3256.     }
  3257. found:
  3258.     get_task_struct(pos);
  3259. out:
  3260.     rcu_read_unlock();
  3261.     return pos;
  3262. }
  3263.  
  3264. /*
  3265.  * Find the next thread in the thread list.
  3266.  * Return NULL if there is an error or no next thread.
  3267.  *
  3268.  * The reference to the input task_struct is released.
  3269.  */
  3270. static struct task_struct *next_tid(struct task_struct *start)
  3271. {
  3272.     struct task_struct *pos = NULL;
  3273.     rcu_read_lock();
  3274.     if (pid_alive(start)) {
  3275.         pos = next_thread(start);
  3276.         if (thread_group_leader(pos))
  3277.             pos = NULL;
  3278.         else
  3279.             get_task_struct(pos);
  3280.     }
  3281.     rcu_read_unlock();
  3282.     put_task_struct(start);
  3283.     return pos;
  3284. }
  3285.  
  3286. static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
  3287.     struct task_struct *task, int tid)
  3288. {
  3289.     char name[PROC_NUMBUF];
  3290.     int len = snprintf(name, sizeof(name), "%d", tid);
  3291.     return proc_fill_cache(filp, dirent, filldir, name, len,
  3292.                 proc_task_instantiate, task, NULL);
  3293. }
  3294.  
  3295. /* for the /proc/TGID/task/ directories */
  3296. static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
  3297. {
  3298.     struct dentry *dentry = filp->f_path.dentry;
  3299.     struct inode *inode = dentry->d_inode;
  3300.     struct task_struct *leader = NULL;
  3301.     struct task_struct *task;
  3302.     int retval = -ENOENT;
  3303.     ino_t ino;
  3304.     int tid;
  3305.     struct pid_namespace *ns;
  3306.  
  3307.     task = get_proc_task(inode);
  3308.     if (!task)
  3309.         goto out_no_task;
  3310.     rcu_read_lock();
  3311.     if (pid_alive(task)) {
  3312.         leader = task->group_leader;
  3313.         get_task_struct(leader);
  3314.     }
  3315.     rcu_read_unlock();
  3316.     put_task_struct(task);
  3317.     if (!leader)
  3318.         goto out_no_task;
  3319.     retval = 0;
  3320.  
  3321.     switch ((unsigned long)filp->f_pos) {
  3322.     case 0:
  3323.         ino = inode->i_ino;
  3324.         if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
  3325.             goto out;
  3326.         filp->f_pos++;
  3327.         /* fall through */
  3328.     case 1:
  3329.         ino = parent_ino(dentry);
  3330.         if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
  3331.             goto out;
  3332.         filp->f_pos++;
  3333.         /* fall through */
  3334.     }
  3335.  
  3336.     /* f_version caches the tgid value that the last readdir call couldn't
  3337.      * return. lseek aka telldir automagically resets f_version to 0.
  3338.      */
  3339.     ns = filp->f_dentry->d_sb->s_fs_info;
  3340.     tid = (int)filp->f_version;
  3341.     filp->f_version = 0;
  3342.     for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
  3343.          task;
  3344.          task = next_tid(task), filp->f_pos++) {
  3345.         tid = task_pid_nr_ns(task, ns);
  3346.         if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
  3347.             /* returning this tgid failed, save it as the first
  3348.              * pid for the next readir call */
  3349.             filp->f_version = (u64)tid;
  3350.             put_task_struct(task);
  3351.             break;
  3352.         }
  3353.     }
  3354. out:
  3355.     put_task_struct(leader);
  3356. out_no_task:
  3357.     return retval;
  3358. }
  3359.  
  3360. static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  3361. {
  3362.     struct inode *inode = dentry->d_inode;
  3363.     struct task_struct *p = get_proc_task(inode);
  3364.     generic_fillattr(inode, stat);
  3365.  
  3366.     if (p) {
  3367.         stat->nlink += get_nr_threads(p);
  3368.         put_task_struct(p);
  3369.     }
  3370.  
  3371.     return 0;
  3372. }
  3373.  
  3374. static const struct inode_operations proc_task_inode_operations = {
  3375.     .lookup     = proc_task_lookup,
  3376.     .getattr    = proc_task_getattr,
  3377.     .setattr    = proc_setattr,
  3378. };
  3379.  
  3380. static const struct file_operations proc_task_operations = {
  3381.     .read       = generic_read_dir,
  3382.     .readdir    = proc_task_readdir,
  3383.     .llseek     = default_llseek,
  3384. };
Add Comment
Please, Sign In to add comment