Advertisement
Guest User

Untitled

a guest
Jan 17th, 2010
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 47.12 KB | None | 0 0
  1. cat ./linux/arch/x86/kernel/time_32-xen.c
  2. /*
  3. * Copyright (C) 1991, 1992, 1995 Linus Torvalds
  4. *
  5. * This file contains the PC-specific time handling details:
  6. * reading the RTC at bootup, etc..
  7. * 1994-07-02 Alan Modra
  8. * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
  9. * 1995-03-26 Markus Kuhn
  10. * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
  11. * precision CMOS clock update
  12. * 1996-05-03 Ingo Molnar
  13. * fixed time warps in do_[slow|fast]_gettimeoffset()
  14. * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
  15. * "A Kernel Model for Precision Timekeeping" by Dave Mills
  16. * 1998-09-05 (Various)
  17. * More robust do_fast_gettimeoffset() algorithm implemented
  18. * (works with APM, Cyrix 6x86MX and Centaur C6),
  19. * monotonic gettimeofday() with fast_get_timeoffset(),
  20. * drift-proof precision TSC calibration on boot
  21. * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
  22. * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
  23. * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
  24. * 1998-12-16 Andrea Arcangeli
  25. * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
  26. * because was not accounting lost_ticks.
  27. * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
  28. * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  29. * serialize accesses to xtime/lost_ticks).
  30. */
  31.  
  32. #include <linux/init.h>
  33. #include <linux/interrupt.h>
  34. #include <linux/time.h>
  35. #include <linux/mca.h>
  36. #include <linux/sysctl.h>
  37. #include <linux/percpu.h>
  38. #include <linux/kernel_stat.h>
  39. #include <linux/posix-timers.h>
  40. #include <linux/cpufreq.h>
  41. #include <linux/clocksource.h>
  42. #include <linux/sysdev.h>
  43.  
  44. #include <asm/delay.h>
  45. #include <asm/time.h>
  46. #include <asm/timer.h>
  47.  
  48. #include <xen/evtchn.h>
  49. #include <xen/sysctl.h>
  50. #include <xen/interface/vcpu.h>
  51.  
  52. #include <asm/i8253.h>
  53. DEFINE_SPINLOCK(i8253_lock);
  54. EXPORT_SYMBOL(i8253_lock);
  55.  
  56. #ifdef CONFIG_X86_64
  57. #include <asm/vsyscall.h>
  58. volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
  59. #endif
  60.  
  61. #define XEN_SHIFT 22
  62.  
  63. unsigned int cpu_khz; /* Detected as we calibrate the TSC */
  64. EXPORT_SYMBOL(cpu_khz);
  65.  
  66. /* These are peridically updated in shared_info, and then copied here. */
  67. struct shadow_time_info {
  68. u64 tsc_timestamp; /* TSC at last update of time vals. */
  69. u64 system_timestamp; /* Time, in nanosecs, since boot. */
  70. u32 tsc_to_nsec_mul;
  71. u32 tsc_to_usec_mul;
  72. int tsc_shift;
  73. u32 version;
  74. };
  75. static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
  76. static struct timespec shadow_tv;
  77. static u32 shadow_tv_version;
  78.  
  79. /* Keep track of last time we did processing/updating of jiffies and xtime. */
  80. static u64 processed_system_time; /* System time (ns) at last processing. */
  81. static DEFINE_PER_CPU(u64, processed_system_time);
  82.  
  83. /* How much CPU time was spent blocked and how much was 'stolen'? */
  84. static DEFINE_PER_CPU(u64, processed_stolen_time);
  85. static DEFINE_PER_CPU(u64, processed_blocked_time);
  86.  
  87. /* Current runstate of each CPU (updated automatically by the hypervisor). */
  88. static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
  89.  
  90. /* Must be signed, as it's compared with s64 quantities which can be -ve. */
  91. #define NS_PER_TICK (1000000000LL/HZ)
  92.  
  93. static struct vcpu_set_periodic_timer xen_set_periodic_tick = {
  94. .period_ns = NS_PER_TICK
  95. };
  96.  
  97. static void __clock_was_set(struct work_struct *unused)
  98. {
  99. clock_was_set();
  100. }
  101. static DECLARE_WORK(clock_was_set_work, __clock_was_set);
  102.  
  103. /*
  104. * GCC 4.3 can turn loops over an induction variable into division. We do
  105. * not support arbitrary 64-bit division, and so must break the induction.
  106. */
  107. #define clobber_induction_variable(v) asm ( "" : "+r" (v) )
  108.  
  109. static inline void __normalize_time(time_t *sec, s64 *nsec)
  110. {
  111. while (*nsec >= NSEC_PER_SEC) {
  112. clobber_induction_variable(*nsec);
  113. (*nsec) -= NSEC_PER_SEC;
  114. (*sec)++;
  115. }
  116. while (*nsec < 0) {
  117. clobber_induction_variable(*nsec);
  118. (*nsec) += NSEC_PER_SEC;
  119. (*sec)--;
  120. }
  121. }
  122.  
  123. /* Does this guest OS track Xen time, or set its wall clock independently? */
  124. static int independent_wallclock = 0;
  125. static int __init __independent_wallclock(char *str)
  126. {
  127. independent_wallclock = 1;
  128. return 1;
  129. }
  130. __setup("independent_wallclock", __independent_wallclock);
  131.  
  132. int xen_independent_wallclock(void)
  133. {
  134. return independent_wallclock;
  135. }
  136.  
  137. /* Permitted clock jitter, in nsecs, beyond which a warning will be printed. */
  138. static unsigned long permitted_clock_jitter = 10000000UL; /* 10ms */
  139. static int __init __permitted_clock_jitter(char *str)
  140. {
  141. permitted_clock_jitter = simple_strtoul(str, NULL, 0);
  142. return 1;
  143. }
  144. __setup("permitted_clock_jitter=", __permitted_clock_jitter);
  145.  
  146. /*
  147. * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  148. * yielding a 64-bit result.
  149. */
  150. static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
  151. {
  152. u64 product;
  153. #ifdef __i386__
  154. u32 tmp1, tmp2;
  155. #endif
  156.  
  157. if (shift < 0)
  158. delta >>= -shift;
  159. else
  160. delta <<= shift;
  161.  
  162. #ifdef __i386__
  163. __asm__ (
  164. "mul %5 ; "
  165. "mov %4,%%eax ; "
  166. "mov %%edx,%4 ; "
  167. "mul %5 ; "
  168. "xor %5,%5 ; "
  169. "add %4,%%eax ; "
  170. "adc %5,%%edx ; "
  171. : "=A" (product), "=r" (tmp1), "=r" (tmp2)
  172. : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
  173. #else
  174. __asm__ (
  175. "mul %%rdx ; shrd $32,%%rdx,%%rax"
  176. : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
  177. #endif
  178.  
  179. return product;
  180. }
  181.  
  182. static inline u64 get64(volatile u64 *ptr)
  183. {
  184. #ifndef CONFIG_64BIT
  185. return cmpxchg64(ptr, 0, 0);
  186. #else
  187. return *ptr;
  188. #endif
  189. }
  190.  
  191. static inline u64 get64_local(volatile u64 *ptr)
  192. {
  193. #ifndef CONFIG_64BIT
  194. return cmpxchg64_local(ptr, 0, 0);
  195. #else
  196. return *ptr;
  197. #endif
  198. }
  199.  
  200. static void init_cpu_khz(void)
  201. {
  202. u64 __cpu_khz = 1000000ULL << 32;
  203. struct vcpu_time_info *info = &vcpu_info(0)->time;
  204. do_div(__cpu_khz, info->tsc_to_system_mul);
  205. if (info->tsc_shift < 0)
  206. cpu_khz = __cpu_khz << -info->tsc_shift;
  207. else
  208. cpu_khz = __cpu_khz >> info->tsc_shift;
  209. }
  210.  
  211. static u64 get_nsec_offset(struct shadow_time_info *shadow)
  212. {
  213. u64 now, delta;
  214. rdtscll(now);
  215. delta = now - shadow->tsc_timestamp;
  216. return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
  217. }
  218.  
  219. static void __update_wallclock(time_t sec, long nsec)
  220. {
  221. long wtm_nsec, xtime_nsec;
  222. time_t wtm_sec, xtime_sec;
  223. u64 tmp, wc_nsec;
  224.  
  225. /* Adjust wall-clock time base. */
  226. wc_nsec = processed_system_time;
  227. wc_nsec += sec * (u64)NSEC_PER_SEC;
  228. wc_nsec += nsec;
  229.  
  230. /* Split wallclock base into seconds and nanoseconds. */
  231. tmp = wc_nsec;
  232. xtime_nsec = do_div(tmp, 1000000000);
  233. xtime_sec = (time_t)tmp;
  234.  
  235. wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
  236. wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
  237.  
  238. set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
  239. set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
  240. }
  241.  
  242. static void update_wallclock(void)
  243. {
  244. shared_info_t *s = HYPERVISOR_shared_info;
  245.  
  246. do {
  247. shadow_tv_version = s->wc_version;
  248. rmb();
  249. shadow_tv.tv_sec = s->wc_sec;
  250. shadow_tv.tv_nsec = s->wc_nsec;
  251. rmb();
  252. } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
  253.  
  254. if (!independent_wallclock)
  255. __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
  256. }
  257.  
  258. /*
  259. * Reads a consistent set of time-base values from Xen, into a shadow data
  260. * area.
  261. */
  262. static void get_time_values_from_xen(unsigned int cpu)
  263. {
  264. struct vcpu_time_info *src;
  265. struct shadow_time_info *dst;
  266. unsigned long flags;
  267. u32 pre_version, post_version;
  268.  
  269. src = &vcpu_info(cpu)->time;
  270. dst = &per_cpu(shadow_time, cpu);
  271.  
  272. local_irq_save(flags);
  273.  
  274. do {
  275. pre_version = dst->version = src->version;
  276. rmb();
  277. dst->tsc_timestamp = src->tsc_timestamp;
  278. dst->system_timestamp = src->system_time;
  279. dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
  280. dst->tsc_shift = src->tsc_shift;
  281. rmb();
  282. post_version = src->version;
  283. } while ((pre_version & 1) | (pre_version ^ post_version));
  284.  
  285. dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
  286.  
  287. local_irq_restore(flags);
  288. }
  289.  
  290. static inline int time_values_up_to_date(void)
  291. {
  292. rmb();
  293. return percpu_read(shadow_time.version) == vcpu_info_read(time.version);
  294. }
  295.  
  296. static void sync_xen_wallclock(unsigned long dummy);
  297. static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
  298. static void sync_xen_wallclock(unsigned long dummy)
  299. {
  300. time_t sec;
  301. s64 nsec;
  302. struct xen_platform_op op;
  303.  
  304. BUG_ON(!is_initial_xendomain());
  305. if (!ntp_synced() || independent_wallclock)
  306. return;
  307.  
  308. write_seqlock_irq(&xtime_lock);
  309.  
  310. sec = xtime.tv_sec;
  311. nsec = xtime.tv_nsec;
  312. __normalize_time(&sec, &nsec);
  313.  
  314. op.cmd = XENPF_settime;
  315. op.u.settime.secs = sec;
  316. op.u.settime.nsecs = nsec;
  317. op.u.settime.system_time = processed_system_time;
  318. WARN_ON(HYPERVISOR_platform_op(&op));
  319.  
  320. update_wallclock();
  321.  
  322. write_sequnlock_irq(&xtime_lock);
  323.  
  324. /* Once per minute. */
  325. mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
  326. }
  327.  
  328. static unsigned long long local_clock(void)
  329. {
  330. unsigned int cpu = get_cpu();
  331. struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
  332. u64 time;
  333. u32 local_time_version;
  334.  
  335. do {
  336. local_time_version = shadow->version;
  337. rdtsc_barrier();
  338. time = shadow->system_timestamp + get_nsec_offset(shadow);
  339. if (!time_values_up_to_date())
  340. get_time_values_from_xen(cpu);
  341. barrier();
  342. } while (local_time_version != shadow->version);
  343.  
  344. put_cpu();
  345.  
  346. return time;
  347. }
  348.  
  349. /*
  350. * Runstate accounting
  351. */
  352. static void get_runstate_snapshot(struct vcpu_runstate_info *res)
  353. {
  354. u64 state_time;
  355. struct vcpu_runstate_info *state;
  356.  
  357. BUG_ON(preemptible());
  358.  
  359. state = &__get_cpu_var(runstate);
  360.  
  361. do {
  362. state_time = get64_local(&state->state_entry_time);
  363. *res = *state;
  364. } while (get64_local(&state->state_entry_time) != state_time);
  365.  
  366. WARN_ON_ONCE(res->state != RUNSTATE_running);
  367. }
  368.  
  369. /*
  370. * Xen sched_clock implementation. Returns the number of unstolen
  371. * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
  372. * states.
  373. */
  374. unsigned long long sched_clock(void)
  375. {
  376. struct vcpu_runstate_info runstate;
  377. cycle_t now;
  378. u64 ret;
  379. s64 offset;
  380.  
  381. /*
  382. * Ideally sched_clock should be called on a per-cpu basis
  383. * anyway, so preempt should already be disabled, but that's
  384. * not current practice at the moment.
  385. */
  386. preempt_disable();
  387.  
  388. now = local_clock();
  389.  
  390. get_runstate_snapshot(&runstate);
  391.  
  392. offset = now - runstate.state_entry_time;
  393. if (offset < 0)
  394. offset = 0;
  395.  
  396. ret = offset + runstate.time[RUNSTATE_running]
  397. + runstate.time[RUNSTATE_blocked];
  398.  
  399. preempt_enable();
  400.  
  401. return ret;
  402. }
  403.  
  404. unsigned long profile_pc(struct pt_regs *regs)
  405. {
  406. unsigned long pc = instruction_pointer(regs);
  407.  
  408. #if defined(CONFIG_SMP) || defined(__x86_64__)
  409. if (!user_mode_vm(regs) && in_lock_functions(pc)) {
  410. # ifdef CONFIG_FRAME_POINTER
  411. return *(unsigned long *)(regs->bp + sizeof(long));
  412. # else
  413. # ifdef __i386__
  414. unsigned long *sp = (unsigned long *)&regs->sp;
  415. # else
  416. unsigned long *sp = (unsigned long *)regs->sp;
  417. # endif
  418.  
  419. /* Return address is either directly at stack pointer
  420. or above a saved flags. Eflags has bits 22-31 zero,
  421. kernel addresses don't. */
  422. if (sp[0] >> 22)
  423. return sp[0];
  424. if (sp[1] >> 22)
  425. return sp[1];
  426. # endif
  427. }
  428. #endif
  429.  
  430. return pc;
  431. }
  432. EXPORT_SYMBOL(profile_pc);
  433.  
  434. /*
  435. * This is the same as the above, except we _also_ save the current
  436. * Time Stamp Counter value at the time of the timer interrupt, so that
  437. * we later on can estimate the time of day more exactly.
  438. */
  439. irqreturn_t timer_interrupt(int irq, void *dev_id)
  440. {
  441. s64 delta, delta_cpu, stolen, blocked;
  442. unsigned int i, cpu = smp_processor_id();
  443. struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
  444. struct vcpu_runstate_info runstate;
  445.  
  446. /* Keep nmi watchdog up to date */
  447. inc_irq_stat(irq0_irqs);
  448.  
  449. /*
  450. * Here we are in the timer irq handler. We just have irqs locally
  451. * disabled but we don't know if the timer_bh is running on the other
  452. * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
  453. * the irq version of write_lock because as just said we have irq
  454. * locally disabled. -arca
  455. */
  456. write_seqlock(&xtime_lock);
  457.  
  458. do {
  459. get_time_values_from_xen(cpu);
  460.  
  461. /* Obtain a consistent snapshot of elapsed wallclock cycles. */
  462. delta = delta_cpu =
  463. shadow->system_timestamp + get_nsec_offset(shadow);
  464. delta -= processed_system_time;
  465. delta_cpu -= per_cpu(processed_system_time, cpu);
  466.  
  467. get_runstate_snapshot(&runstate);
  468. } while (!time_values_up_to_date());
  469.  
  470. if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
  471. unlikely(delta_cpu < -(s64)permitted_clock_jitter))
  472. && printk_ratelimit()) {
  473. printk("Timer ISR/%u: Time went backwards: "
  474. "delta=%lld delta_cpu=%lld shadow=%lld "
  475. "off=%lld processed=%lld cpu_processed=%lld\n",
  476. cpu, delta, delta_cpu, shadow->system_timestamp,
  477. (s64)get_nsec_offset(shadow),
  478. processed_system_time,
  479. per_cpu(processed_system_time, cpu));
  480. for (i = 0; i < num_online_cpus(); i++)
  481. printk(" %d: %lld\n", i,
  482. per_cpu(processed_system_time, i));
  483. }
  484.  
  485. /* System-wide jiffy work. */
  486. if (delta >= NS_PER_TICK) {
  487. do_div(delta, NS_PER_TICK);
  488. processed_system_time += delta * NS_PER_TICK;
  489. while (delta > HZ) {
  490. clobber_induction_variable(delta);
  491. do_timer(HZ);
  492. delta -= HZ;
  493. }
  494. do_timer(delta);
  495. }
  496.  
  497. if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
  498. update_wallclock();
  499. if (keventd_up())
  500. schedule_work(&clock_was_set_work);
  501. }
  502.  
  503. write_sequnlock(&xtime_lock);
  504.  
  505. /*
  506. * Account stolen ticks.
  507. * ensures that the ticks are accounted as stolen.
  508. */
  509. stolen = runstate.time[RUNSTATE_runnable]
  510. + runstate.time[RUNSTATE_offline]
  511. - per_cpu(processed_stolen_time, cpu);
  512. if ((stolen > 0) && (delta_cpu > 0)) {
  513. delta_cpu -= stolen;
  514. if (unlikely(delta_cpu < 0))
  515. stolen += delta_cpu; /* clamp local-time progress */
  516. do_div(stolen, NS_PER_TICK);
  517. per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
  518. per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
  519. account_steal_time((cputime_t)stolen);
  520. }
  521.  
  522. /*
  523. * Account blocked ticks.
  524. * ensures that the ticks are accounted as idle/wait.
  525. */
  526. blocked = runstate.time[RUNSTATE_blocked]
  527. - per_cpu(processed_blocked_time, cpu);
  528. if ((blocked > 0) && (delta_cpu > 0)) {
  529. delta_cpu -= blocked;
  530. if (unlikely(delta_cpu < 0))
  531. blocked += delta_cpu; /* clamp local-time progress */
  532. do_div(blocked, NS_PER_TICK);
  533. per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
  534. per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK;
  535. account_idle_time((cputime_t)blocked);
  536. }
  537.  
  538. /* Account user/system ticks. */
  539. if (delta_cpu > 0) {
  540. do_div(delta_cpu, NS_PER_TICK);
  541. per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
  542. if (user_mode_vm(get_irq_regs()))
  543. account_user_time(current, (cputime_t)delta_cpu,
  544. (cputime_t)delta_cpu);
  545. else if (current != idle_task(cpu))
  546. account_system_time(current, HARDIRQ_OFFSET,
  547. (cputime_t)delta_cpu,
  548. (cputime_t)delta_cpu);
  549. else
  550. account_idle_time((cputime_t)delta_cpu);
  551. }
  552.  
  553. /* Offlined for more than a few seconds? Avoid lockup warnings. */
  554. if (stolen > 5*HZ)
  555. touch_softlockup_watchdog();
  556.  
  557. /* Local timer processing (see update_process_times()). */
  558. run_local_timers();
  559. if (rcu_pending(cpu))
  560. rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
  561. printk_tick();
  562. scheduler_tick();
  563. run_posix_cpu_timers(current);
  564. profile_tick(CPU_PROFILING);
  565.  
  566. return IRQ_HANDLED;
  567. }
  568.  
  569. void mark_tsc_unstable(char *reason)
  570. {
  571. #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
  572. tsc_unstable = 1;
  573. #endif
  574. }
  575. EXPORT_SYMBOL_GPL(mark_tsc_unstable);
  576.  
  577. static void init_missing_ticks_accounting(unsigned int cpu)
  578. {
  579. struct vcpu_register_runstate_memory_area area;
  580. struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
  581. int rc;
  582.  
  583. memset(runstate, 0, sizeof(*runstate));
  584.  
  585. area.addr.v = runstate;
  586. rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
  587. WARN_ON(rc && rc != -ENOSYS);
  588.  
  589. per_cpu(processed_blocked_time, cpu) =
  590. runstate->time[RUNSTATE_blocked];
  591. per_cpu(processed_stolen_time, cpu) =
  592. runstate->time[RUNSTATE_runnable] +
  593. runstate->time[RUNSTATE_offline];
  594. }
  595.  
  596. static cycle_t cs_last;
  597.  
  598. static cycle_t xen_clocksource_read(struct clocksource *cs)
  599. {
  600. #ifdef CONFIG_SMP
  601. cycle_t last = get64(&cs_last);
  602. cycle_t ret = local_clock();
  603.  
  604. if (unlikely((s64)(ret - last) < 0)) {
  605. if (last - ret > permitted_clock_jitter
  606. && printk_ratelimit()) {
  607. unsigned int cpu = get_cpu();
  608. struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
  609.  
  610. printk(KERN_WARNING "clocksource/%u: "
  611. "Time went backwards: "
  612. "ret=%Lx delta=%Ld shadow=%Lx offset=%Lx\n",
  613. cpu, ret, ret - last, shadow->system_timestamp,
  614. get_nsec_offset(shadow));
  615. put_cpu();
  616. }
  617. return last;
  618. }
  619.  
  620. for (;;) {
  621. cycle_t cur = cmpxchg64(&cs_last, last, ret);
  622.  
  623. if (cur == last || (s64)(ret - cur) < 0)
  624. return ret;
  625. last = cur;
  626. }
  627. #else
  628. return local_clock();
  629. #endif
  630. }
  631.  
  632. /* No locking required. Interrupts are disabled on all CPUs. */
  633. static void xen_clocksource_resume(void)
  634. {
  635. unsigned int cpu;
  636.  
  637. init_cpu_khz();
  638.  
  639. for_each_online_cpu(cpu) {
  640. switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
  641. &xen_set_periodic_tick)) {
  642. case 0:
  643. #if CONFIG_XEN_COMPAT <= 0x030004
  644. case -ENOSYS:
  645. #endif
  646. break;
  647. default:
  648. BUG();
  649. }
  650. get_time_values_from_xen(cpu);
  651. per_cpu(processed_system_time, cpu) =
  652. per_cpu(shadow_time, 0).system_timestamp;
  653. init_missing_ticks_accounting(cpu);
  654. }
  655.  
  656. processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
  657.  
  658. cs_last = local_clock();
  659. }
  660.  
  661. static struct clocksource clocksource_xen = {
  662. .name = "xen",
  663. .rating = 400,
  664. .read = xen_clocksource_read,
  665. .mask = CLOCKSOURCE_MASK(64),
  666. .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */
  667. .shift = XEN_SHIFT,
  668. .flags = CLOCK_SOURCE_IS_CONTINUOUS,
  669. .resume = xen_clocksource_resume,
  670. };
  671.  
  672. unsigned long xen_read_persistent_clock(void)
  673. {
  674. const shared_info_t *s = HYPERVISOR_shared_info;
  675. u32 version, sec, nsec;
  676. u64 delta;
  677.  
  678. do {
  679. version = s->wc_version;
  680. rmb();
  681. sec = s->wc_sec;
  682. nsec = s->wc_nsec;
  683. rmb();
  684. } while ((s->wc_version & 1) | (version ^ s->wc_version));
  685.  
  686. delta = local_clock() + (u64)sec * NSEC_PER_SEC + nsec;
  687. do_div(delta, NSEC_PER_SEC);
  688.  
  689. return delta;
  690. }
  691.  
  692. int xen_update_persistent_clock(void)
  693. {
  694. if (!is_initial_xendomain())
  695. return -1;
  696. mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
  697. return 0;
  698. }
  699.  
  700. /* Dynamically-mapped IRQ. */
  701. static int __read_mostly timer_irq = -1;
  702. static struct irqaction timer_action = {
  703. .handler = timer_interrupt,
  704. .flags = IRQF_DISABLED|IRQF_TIMER,
  705. .name = "timer"
  706. };
  707.  
  708. static void __init setup_cpu0_timer_irq(void)
  709. {
  710. timer_irq = bind_virq_to_irqaction(VIRQ_TIMER, 0, &timer_action);
  711. BUG_ON(timer_irq < 0);
  712. }
  713.  
  714. void __init time_init(void)
  715. {
  716. init_cpu_khz();
  717. printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
  718. cpu_khz / 1000, cpu_khz % 1000);
  719.  
  720. switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
  721. &xen_set_periodic_tick)) {
  722. case 0:
  723. #if CONFIG_XEN_COMPAT <= 0x030004
  724. case -ENOSYS:
  725. #endif
  726. break;
  727. default:
  728. BUG();
  729. }
  730.  
  731. get_time_values_from_xen(0);
  732.  
  733. processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
  734. per_cpu(processed_system_time, 0) = processed_system_time;
  735. init_missing_ticks_accounting(0);
  736.  
  737. clocksource_register(&clocksource_xen);
  738.  
  739. update_wallclock();
  740.  
  741. use_tsc_delay();
  742.  
  743. /* Cannot request_irq() until kmem is initialised. */
  744. late_time_init = setup_cpu0_timer_irq;
  745. }
  746.  
  747. /* Convert jiffies to system time. */
  748. u64 jiffies_to_st(unsigned long j)
  749. {
  750. unsigned long seq;
  751. long delta;
  752. u64 st;
  753.  
  754. do {
  755. seq = read_seqbegin(&xtime_lock);
  756. delta = j - jiffies;
  757. if (delta < 1) {
  758. /* Triggers in some wrap-around cases, but that's okay:
  759. * we just end up with a shorter timeout. */
  760. st = processed_system_time + NS_PER_TICK;
  761. } else if (((unsigned long)delta >> (BITS_PER_LONG-3)) != 0) {
  762. /* Very long timeout means there is no pending timer.
  763. * We indicate this to Xen by passing zero timeout. */
  764. st = 0;
  765. } else {
  766. st = processed_system_time + delta * (u64)NS_PER_TICK;
  767. }
  768. } while (read_seqretry(&xtime_lock, seq));
  769.  
  770. return st;
  771. }
  772. EXPORT_SYMBOL(jiffies_to_st);
  773.  
  774. /*
  775. * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
  776. * These functions are based on implementations from arch/s390/kernel/time.c
  777. */
  778. static void stop_hz_timer(void)
  779. {
  780. struct vcpu_set_singleshot_timer singleshot;
  781. unsigned int cpu = smp_processor_id();
  782. unsigned long j;
  783. int rc;
  784.  
  785. cpumask_set_cpu(cpu, nohz_cpu_mask);
  786.  
  787. /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */
  788. /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */
  789. /* value of rcp->cur that matches rdp->quiescbatch and allows us to */
  790. /* stop the hz timer then the cpumasks created for subsequent values */
  791. /* of cur in rcu_start_batch are guaranteed to pick up the updated */
  792. /* nohz_cpu_mask and so will not depend on this cpu. */
  793.  
  794. smp_mb();
  795.  
  796. /* Leave ourselves in tick mode if rcu or softirq or timer pending. */
  797. if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
  798. local_softirq_pending() ||
  799. (j = get_next_timer_interrupt(jiffies),
  800. time_before_eq(j, jiffies))) {
  801. cpumask_clear_cpu(cpu, nohz_cpu_mask);
  802. j = jiffies + 1;
  803. }
  804.  
  805. singleshot.timeout_abs_ns = jiffies_to_st(j) + NS_PER_TICK/2;
  806. singleshot.flags = 0;
  807. rc = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &singleshot);
  808. #if CONFIG_XEN_COMPAT <= 0x030004
  809. if (rc) {
  810. BUG_ON(rc != -ENOSYS);
  811. rc = HYPERVISOR_set_timer_op(singleshot.timeout_abs_ns);
  812. }
  813. #endif
  814. BUG_ON(rc);
  815. }
  816.  
  817. static void start_hz_timer(void)
  818. {
  819. cpumask_clear_cpu(smp_processor_id(), nohz_cpu_mask);
  820. }
  821.  
  822. void xen_safe_halt(void)
  823. {
  824. stop_hz_timer();
  825. /* Blocking includes an implicit local_irq_enable(). */
  826. HYPERVISOR_block();
  827. start_hz_timer();
  828. }
  829. EXPORT_SYMBOL(xen_safe_halt);
  830.  
  831. void xen_halt(void)
  832. {
  833. if (irqs_disabled())
  834. VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
  835. }
  836. EXPORT_SYMBOL(xen_halt);
  837.  
  838. #ifdef CONFIG_SMP
  839. int __cpuinit local_setup_timer(unsigned int cpu)
  840. {
  841. int seq, irq;
  842.  
  843. BUG_ON(cpu == 0);
  844.  
  845. switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
  846. &xen_set_periodic_tick)) {
  847. case 0:
  848. #if CONFIG_XEN_COMPAT <= 0x030004
  849. case -ENOSYS:
  850. #endif
  851. break;
  852. default:
  853. BUG();
  854. }
  855.  
  856. do {
  857. seq = read_seqbegin(&xtime_lock);
  858. /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
  859. per_cpu(processed_system_time, cpu) =
  860. per_cpu(shadow_time, 0).system_timestamp;
  861. init_missing_ticks_accounting(cpu);
  862. } while (read_seqretry(&xtime_lock, seq));
  863.  
  864. irq = bind_virq_to_irqaction(VIRQ_TIMER, cpu, &timer_action);
  865. if (irq < 0)
  866. return irq;
  867. BUG_ON(timer_irq != irq);
  868.  
  869. return 0;
  870. }
  871.  
  872. void __cpuinit local_teardown_timer(unsigned int cpu)
  873. {
  874. BUG_ON(cpu == 0);
  875. unbind_from_per_cpu_irq(timer_irq, cpu, &timer_action);
  876. }
  877. #endif
  878.  
  879. #ifdef CONFIG_CPU_FREQ
  880. static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
  881. void *data)
  882. {
  883. struct cpufreq_freqs *freq = data;
  884. struct xen_platform_op op;
  885.  
  886. if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
  887. return 0;
  888.  
  889. if (val == CPUFREQ_PRECHANGE)
  890. return 0;
  891.  
  892. op.cmd = XENPF_change_freq;
  893. op.u.change_freq.flags = 0;
  894. op.u.change_freq.cpu = freq->cpu;
  895. op.u.change_freq.freq = (u64)freq->new * 1000;
  896. WARN_ON(HYPERVISOR_platform_op(&op));
  897.  
  898. return 0;
  899. }
  900.  
  901. static struct notifier_block time_cpufreq_notifier_block = {
  902. .notifier_call = time_cpufreq_notifier
  903. };
  904.  
  905. static int __init cpufreq_time_setup(void)
  906. {
  907. if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
  908. CPUFREQ_TRANSITION_NOTIFIER)) {
  909. printk(KERN_ERR "failed to set up cpufreq notifier\n");
  910. return -ENODEV;
  911. }
  912. return 0;
  913. }
  914.  
  915. core_initcall(cpufreq_time_setup);
  916. #endif
  917.  
  918. /*
  919. * /proc/sys/xen: This really belongs in another file. It can stay here for
  920. * now however.
  921. */
  922. static ctl_table xen_subtable[] = {
  923. {
  924. .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK,
  925. .procname = "independent_wallclock",
  926. .data = &independent_wallclock,
  927. .maxlen = sizeof(independent_wallclock),
  928. .mode = 0644,
  929. .strategy = sysctl_data,
  930. .proc_handler = proc_dointvec
  931. },
  932. {
  933. .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER,
  934. .procname = "permitted_clock_jitter",
  935. .data = &permitted_clock_jitter,
  936. .maxlen = sizeof(permitted_clock_jitter),
  937. .mode = 0644,
  938. .strategy = sysctl_data,
  939. .proc_handler = proc_doulongvec_minmax
  940. },
  941. { }
  942. };
  943. static ctl_table xen_table[] = {
  944. {
  945. .ctl_name = CTL_XEN,
  946. .procname = "xen",
  947. .mode = 0555,
  948. .child = xen_subtable
  949. },
  950. { }
  951. };
  952. static int __init xen_sysctl_init(void)
  953. {
  954. (void)register_sysctl_table(xen_table);
  955. return 0;
  956. }
  957. __initcall(xen_sysctl_init);
  958.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement