Merge commit 'linux-2.6/master' (early part) into oprofile/core

This branch depends on these apic patches: apic, x86: Use BIOS settings for IBS and MCE threshold interrupt LVT offsets apic, x86: Check if EILVT APIC registers are available (AMD only) Signed-off-by: Robert Richter <robert.richter@amd.com>
2025-01-12 08:48:48 +00:00 · 2010-10-25 16:28:14 +02:00 · 2010-10-25 16:28:14 +02:00 · dbd1e66e04
commit dbd1e66e04
parent 328b8f1ba5 4a60cfa945
465 changed files with 16963 additions and 10198 deletions
--- a/Documentation/DocBook/genericirq.tmpl
+++ b/Documentation/DocBook/genericirq.tmpl
@ -28,7 +28,7 @@
  </authorgroup>

  <copyright>
-   <year>2005-2006</year>
+   <year>2005-2010</year>
   <holder>Thomas Gleixner</holder>
  </copyright>
  <copyright>
@ -100,6 +100,10 @@
 	  <listitem><para>Edge type</para></listitem>
 	  <listitem><para>Simple type</para></listitem>
 	</itemizedlist>
+	During the implementation we identified another type:
+	<itemizedlist>
+	  <listitem><para>Fast EOI type</para></listitem>
+	</itemizedlist>
 	In the SMP world of the __do_IRQ() super-handler another type
 	was identified:
 	<itemizedlist>
@ -153,6 +157,7 @@
 	is still available. This leads to a kind of duality for the time
 	being. Over time the new model should be used in more and more
 	architectures, as it enables smaller and cleaner IRQ subsystems.
+	It's deprecated for three years now and about to be removed.
 	</para>
  </chapter>
  <chapter id="bugs">
@ -217,6 +222,7 @@
 	  <itemizedlist>
 	  <listitem><para>handle_level_irq</para></listitem>
 	  <listitem><para>handle_edge_irq</para></listitem>
+	  <listitem><para>handle_fasteoi_irq</para></listitem>
 	  <listitem><para>handle_simple_irq</para></listitem>
 	  <listitem><para>handle_percpu_irq</para></listitem>
 	  </itemizedlist>
@ -233,33 +239,33 @@
 		are used by the default flow implementations.
 		The following helper functions are implemented (simplified excerpt):
 		<programlisting>
-default_enable(irq)
+default_enable(struct irq_data *data)
 {
-	desc->chip->unmask(irq);
+	desc->chip->irq_unmask(data);
 }

-default_disable(irq)
+default_disable(struct irq_data *data)
 {
-	if (!delay_disable(irq))
-		desc->chip->mask(irq);
+	if (!delay_disable(data))
+		desc->chip->irq_mask(data);
 }

-default_ack(irq)
+default_ack(struct irq_data *data)
 {
-	chip->ack(irq);
+	chip->irq_ack(data);
 }

-default_mask_ack(irq)
+default_mask_ack(struct irq_data *data)
 {
-	if (chip->mask_ack) {
-		chip->mask_ack(irq);
+	if (chip->irq_mask_ack) {
+		chip->irq_mask_ack(data);
 	} else {
-		chip->mask(irq);
-		chip->ack(irq);
+		chip->irq_mask(data);
+		chip->irq_ack(data);
 	}
 }

-noop(irq)
+noop(struct irq_data *data))
 {
 }

@ -278,12 +284,27 @@ noop(irq)
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-desc->chip->start();
+desc->chip->irq_mask();
 handle_IRQ_event(desc->action);
-desc->chip->end();
+desc->chip->irq_unmask();
 		</programlisting>
 		</para>
-   	    </sect3>
+	    </sect3>
+	    <sect3 id="Default_FASTEOI_IRQ_flow_handler">
+		<title>Default Fast EOI IRQ flow handler</title>
+		<para>
+		handle_fasteoi_irq provides a generic implementation
+		for interrupts, which only need an EOI at the end of
+		the handler
+		</para>
+		<para>
+		The following control flow is implemented (simplified excerpt):
+		<programlisting>
+handle_IRQ_event(desc->action);
+desc->chip->irq_eoi();
+		</programlisting>
+		</para>
+	    </sect3>
 	    <sect3 id="Default_Edge_IRQ_flow_handler">
 	 	<title>Default Edge IRQ flow handler</title>
 		<para>
@ -294,20 +315,19 @@ desc->chip->end();
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
 if (desc->status &amp; running) {
-	desc->chip->hold();
+	desc->chip->irq_mask();
 	desc->status |= pending | masked;
 	return;
 }
-desc->chip->start();
+desc->chip->irq_ack();
 desc->status |= running;
 do {
 	if (desc->status &amp; masked)
-		desc->chip->enable();
+		desc->chip->irq_unmask();
 	desc->status &amp;= ~pending;
 	handle_IRQ_event(desc->action);
 } while (status &amp; pending);
 desc->status &amp;= ~running;
-desc->chip->end();
 		</programlisting>
 		</para>
   	    </sect3>
@ -342,9 +362,9 @@ handle_IRQ_event(desc->action);
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-desc->chip->start();
 handle_IRQ_event(desc->action);
-desc->chip->end();
+if (desc->chip->irq_eoi)
+        desc->chip->irq_eoi();
 		</programlisting>
 		</para>
   	    </sect3>
@ -375,8 +395,7 @@ desc->chip->end();
 	mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when
 	you want to use the delayed interrupt disable feature and your
 	hardware is not capable of retriggering	an interrupt.)
-	The delayed interrupt disable can be runtime enabled, per interrupt,
-	by setting the IRQ_DELAYED_DISABLE flag in the irq_desc status field.
+	The delayed interrupt disable is not configurable.
 	</para>
 	</sect2>
    </sect1>
@ -387,13 +406,13 @@ desc->chip->end();
 	contains all the direct chip relevant functions, which
 	can be utilized by the irq flow implementations.
 	  <itemizedlist>
-	  <listitem><para>ack()</para></listitem>
-	  <listitem><para>mask_ack() - Optional, recommended for performance</para></listitem>
-	  <listitem><para>mask()</para></listitem>
-	  <listitem><para>unmask()</para></listitem>
-	  <listitem><para>retrigger() - Optional</para></listitem>
-	  <listitem><para>set_type() - Optional</para></listitem>
-	  <listitem><para>set_wake() - Optional</para></listitem>
+	  <listitem><para>irq_ack()</para></listitem>
+	  <listitem><para>irq_mask_ack() - Optional, recommended for performance</para></listitem>
+	  <listitem><para>irq_mask()</para></listitem>
+	  <listitem><para>irq_unmask()</para></listitem>
+	  <listitem><para>irq_retrigger() - Optional</para></listitem>
+	  <listitem><para>irq_set_type() - Optional</para></listitem>
+	  <listitem><para>irq_set_wake() - Optional</para></listitem>
 	  </itemizedlist>
 	These primitives are strictly intended to mean what they say: ack means
 	ACK, masking means masking of an IRQ line, etc. It is up to the flow
@ -458,6 +477,7 @@ desc->chip->end();
     <para>
     This chapter contains the autogenerated documentation of the internal functions.
     </para>
+!Ikernel/irq/irqdesc.c
 !Ikernel/irq/handle.c
 !Ikernel/irq/chip.c
  </chapter>
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@ -1645,7 +1645,9 @@ the amount of locking which needs to be done.
      all the readers who were traversing the list when we deleted the
      element are finished.  We use <function>call_rcu()</function> to
      register a callback which will actually destroy the object once
-      the readers are finished.
+      all pre-existing readers are finished.  Alternatively,
+      <function>synchronize_rcu()</function> may be used to block until
+      all pre-existing are finished.
    </para>
    <para>
      But how does Read Copy Update know when the readers are
@ -1714,7 +1716,7 @@ the amount of locking which needs to be done.
 -        object_put(obj);
 +        list_del_rcu(&amp;obj-&gt;list);
         cache_num--;
-+        call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu, obj);
+        call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu);
 }

 /* Must be holding cache_lock */
@ -1725,14 +1727,6 @@ the amount of locking which needs to be done.
         if (++cache_num > MAX_CACHE_SIZE) {
                 struct object *i, *outcast = NULL;
                 list_for_each_entry(i, &amp;cache, list) {
-@@ -85,6 +94,7 @@
-         obj-&gt;popularity = 0;
-         atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
-         spin_lock_init(&amp;obj-&gt;lock);
-+        INIT_RCU_HEAD(&amp;obj-&gt;rcu);
-
-         spin_lock_irqsave(&amp;cache_lock, flags);
-         __cache_add(obj);
@@ -104,12 +114,11 @@
 struct object *cache_find(int id)
 {
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome!
 	include:

 	a.	Keeping a count of the number of data-structure elements
-		used by the RCU-protected data structure, including those
-		waiting for a grace period to elapse.  Enforce a limit
-		on this number, stalling updates as needed to allow
-		previously deferred frees to complete.
+		used by the RCU-protected data structure, including
+		those waiting for a grace period to elapse.  Enforce a
+		limit on this number, stalling updates as needed to allow
+		previously deferred frees to complete.	Alternatively,
+		limit only the number awaiting deferred free rather than
+		the total number of elements.

-		Alternatively, limit only the number awaiting deferred
-		free rather than the total number of elements.
+		One way to stall the updates is to acquire the update-side
+		mutex.	(Don't try this with a spinlock -- other CPUs
+		spinning on the lock could prevent the grace period
+		from ever ending.)  Another way to stall the updates
+		is for the updates to use a wrapper function around
+		the memory allocator, so that this wrapper function
+		simulates OOM when there is too much memory awaiting an
+		RCU grace period.  There are of course many other
+		variations on this theme.

 	b.	Limiting update rate.  For example, if updates occur only
 		once per hour, then no explicit rate limiting is required,
@ -365,3 +374,26 @@ over a rather long period of time, but improvements are always welcome!
 	and the compiler to freely reorder code into and out of RCU
 	read-side critical sections.  It is the responsibility of the
 	RCU update-side primitives to deal with this.
+
+17.	Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and
+	the __rcu sparse checks to validate your RCU code.  These
+	can help find problems as follows:
+
+	CONFIG_PROVE_RCU: check that accesses to RCU-protected data
+		structures are carried out under the proper RCU
+		read-side critical section, while holding the right
+		combination of locks, or whatever other conditions
+		are appropriate.
+
+	CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
+		same object to call_rcu() (or friends) before an RCU
+		grace period has elapsed since the last time that you
+		passed that same object to call_rcu() (or friends).
+
+	__rcu sparse checks: tag the pointer to the RCU-protected data
+		structure with __rcu, and sparse will warn you if you
+		access that pointer without the services of one of the
+		variants of rcu_dereference().
+
+	These debugging aids can help you find problems that are
+	otherwise extremely difficult to spot.
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@ -80,6 +80,24 @@ o	A CPU looping with bottom halves disabled.  This condition can
 o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
 	without invoking schedule().

+o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+	happen to preempt a low-priority task in the middle of an RCU
+	read-side critical section.   This is especially damaging if
+	that low-priority task is not permitted to run on any other CPU,
+	in which case the next RCU grace period can never complete, which
+	will eventually cause the system to run out of memory and hang.
+	While the system is in the process of running itself out of
+	memory, you might see stall-warning messages.
+
+o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+	is running at a higher priority than the RCU softirq threads.
+	This will prevent RCU callbacks from ever being invoked,
+	and in a CONFIG_TREE_PREEMPT_RCU kernel will further prevent
+	RCU grace periods from ever completing.  Either way, the
+	system will eventually run out of memory and hang.  In the
+	CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning
+	messages.
+
 o	A bug in the RCU implementation.

 o	A hardware failure.  This is quite unlikely, but has occurred
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@ -125,6 +125,17 @@ o	"b" is the batch limit for this CPU.  If more than this number
 	of RCU callbacks is ready to invoke, then the remainder will
 	be deferred.

+o	"ci" is the number of RCU callbacks that have been invoked for
+	this CPU.  Note that ci+ql is the number of callbacks that have
+	been registered in absence of CPU-hotplug activity.
+
+o	"co" is the number of RCU callbacks that have been orphaned due to
+	this CPU going offline.
+
+o	"ca" is the number of RCU callbacks that have been adopted due to
+	other CPUs going offline.  Note that ci+co-ca+ql is the number of
+	RCU callbacks registered on this CPU.
+
 There is also an rcu/rcudata.csv file with the same information in
 comma-separated-variable spreadsheet format.

@ -180,7 +191,7 @@ o	"s" is the "signaled" state that drives force_quiescent_state()'s

 o	"jfq" is the number of jiffies remaining for this grace period
 	before force_quiescent_state() is invoked to help push things
-	along.  Note that CPUs in dyntick-idle mode thoughout the grace
+	along.  Note that CPUs in dyntick-idle mode throughout the grace
 	period will not report on their own, but rather must be check by
 	some other CPU via force_quiescent_state().

--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@ -14,25 +14,39 @@ to /proc/cpuinfo.
 	identifier (rather than the kernel's).  The actual value is
 	architecture and platform dependent.

-3) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+3) /sys/devices/system/cpu/cpuX/topology/book_id:
+
+	the book ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).	The actual value is
+	architecture and platform dependent.
+
+4) /sys/devices/system/cpu/cpuX/topology/thread_siblings:

 	internel kernel map of cpuX's hardware threads within the same
 	core as cpuX

-4) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+5) /sys/devices/system/cpu/cpuX/topology/core_siblings:

 	internal kernel map of cpuX's hardware threads within the same
 	physical_package_id.

+6) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+
+	internal kernel map of cpuX's hardware threads within the same
+	book_id.
+
 To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 4 attributes.
+drivers/base/topology.c, is to export the 4 or 6 attributes. The two book
+related sysfs files will only be created if CONFIG_SCHED_BOOK is selected.

 For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h:
 #define topology_physical_package_id(cpu)
 #define topology_core_id(cpu)
+#define topology_book_id(cpu)
 #define topology_thread_cpumask(cpu)
 #define topology_core_cpumask(cpu)
+#define topology_book_cpumask(cpu)

 The type of **_id is int.
 The type of siblings is (const) struct cpumask *.
@ -45,6 +59,9 @@ not defined by include/asm-XXX/topology.h:
 3) thread_siblings: just the given CPU
 4) core_siblings: just the given CPU

+For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
+default definitions for topology_book_id() and topology_book_cpumask().
+
 Additionally, CPU topology information is provided under
 /sys/devices/system/cpu and includes these files.  The internal
 source for the output is in brackets ("[]").
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@ -386,34 +386,6 @@ Who:	Tejun Heo <tj@kernel.org>

 ----------------------------

-What:	Support for VMware's guest paravirtuliazation technique [VMI] will be
-	dropped.
-When:	2.6.37 or earlier.
-Why:	With the recent innovations in CPU hardware acceleration technologies
-	from Intel and AMD, VMware ran a few experiments to compare these
-	techniques to guest paravirtualization technique on VMware's platform.
-	These hardware assisted virtualization techniques have outperformed the
-	performance benefits provided by VMI in most of the workloads. VMware
-	expects that these hardware features will be ubiquitous in a couple of
-	years, as a result, VMware has started a phased retirement of this
-	feature from the hypervisor. We will be removing this feature from the
-	Kernel too. Right now we are targeting 2.6.37 but can retire earlier if
-	technical reasons (read opportunity to remove major chunk of pvops)
-	arise.
-
-	Please note that VMI has always been an optimization and non-VMI kernels
-	still work fine on VMware's platform.
-	Latest versions of VMware's product which support VMI are,
-	Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence
-	releases for these products will continue supporting VMI.
-
-	For more details about VMI retirement take a look at this,
-	http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html
-
-Who:	Alok N Kataria <akataria@vmware.com>
-
----------------------------
-
 What:	Support for lcd_switch and display_get in asus-laptop driver
 When:	March 2010
 Why:	These two features use non-standard interfaces. There are the
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@ -455,7 +455,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			[ARM] imx_timer1,OSTS,netx_timer,mpu_timer2,
 				pxa_timer,timer3,32k_counter,timer0_1
 			[AVR32] avr32
-			[X86-32] pit,hpet,tsc,vmi-timer;
+			[X86-32] pit,hpet,tsc;
 				scx200_hrt on Geode; cyclone on IBM x440
 			[MIPS] MIPS
 			[PARISC] cr16
@ -2153,6 +2153,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			Reserves a hole at the top of the kernel virtual
 			address space.

+	reservelow=	[X86]
+			Format: nn[K]
+			Set the amount of memory to reserve for BIOS at
+			the bottom of the address space.
+
 	reset_devices	[KNL] Force drivers to reset the underlying device
 			during initialization.

@ -2435,6 +2440,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			disables clocksource verification at runtime.
 			Used to enable high-resolution timer mode on older
 			hardware, and in virtualized environment.
+			[x86] noirqtime: Do not use TSC to do irq accounting.
+			Used to run time disable IRQ_TIME_ACCOUNTING on any
+			platforms where RDTSC is slow and this accounting
+			can add overhead.

 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
--- a/17
+++ b/17
@ -1527,6 +1527,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 S:	Supported
 F:	Documentation/filesystems/ceph.txt
 F:	fs/ceph
+F:	net/ceph
+F:	include/linux/ceph

 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 M:	David Vrabel <david.vrabel@csr.com>
@ -3239,6 +3241,12 @@ F:	drivers/net/irda/
 F:	include/net/irda/
 F:	net/irda/

+IRQ SUBSYSTEM
+M:	Thomas Gleixner <tglx@linutronix.de>
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git irq/core
+F:	kernel/irq/
+
 ISAPNP
 M:	Jaroslav Kysela <perex@perex.cz>
 S:	Maintained
@ -4805,6 +4813,15 @@ F:	fs/qnx4/
 F:	include/linux/qnx4_fs.h
 F:	include/linux/qnxtypes.h

+RADOS BLOCK DEVICE (RBD)
+F:	include/linux/qnxtypes.h
+M:	Yehuda Sadeh <yehuda@hq.newdream.net>
+M:	Sage Weil <sage@newdream.net>
+M:	ceph-devel@vger.kernel.org
+S:	Supported
+F:	drivers/block/rbd.c
+F:	drivers/block/rbd_types.h
+
 RADEON FRAMEBUFFER DISPLAY DRIVER
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 L:	linux-fbdev@vger.kernel.org
--- a/arch/arm/include/asm/hw_irq.h
+++ b/arch/arm/include/asm/hw_irq.h
@ -24,4 +24,6 @@ void set_irq_flags(unsigned int irq, unsigned int flags);
 #define IRQF_PROBE	(1 << 1)
 #define IRQF_NOAUTOEN	(1 << 2)

+#define ARCH_IRQ_INIT_FLAGS	(IRQ_NOREQUEST | IRQ_NOPROBE)
+
 #endif
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@ -154,14 +154,6 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)

 void __init init_IRQ(void)
 {
-	struct irq_desc *desc;
-	int irq;
-
-	for (irq = 0; irq < nr_irqs; irq++) {
-		desc = irq_to_desc_alloc_node(irq, 0);
-		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
-	}
-
 	init_arch_irq();
 }

@ -169,7 +161,7 @@ void __init init_IRQ(void)
 int __init arch_probe_nr_irqs(void)
 {
 	nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS;
-	return 0;
+	return nr_irqs;
 }
 #endif

--- a/arch/arm/mach-bcmring/dma.c
+++ b/arch/arm/mach-bcmring/dma.c
@ -691,7 +691,7 @@ int dma_init(void)

 	memset(&gDMA, 0, sizeof(gDMA));

-	init_MUTEX_LOCKED(&gDMA.lock);
+	sema_init(&gDMA.lock, 0);
 	init_waitqueue_head(&gDMA.freeChannelQ);

 	/* Initialize the Hardware */
@ -1574,7 +1574,7 @@ int dma_init_mem_map(DMA_MemMap_t *memMap)
 {
 	memset(memMap, 0, sizeof(*memMap));

-	init_MUTEX(&memMap->lock);
+	sema_init(&memMap->lock, 1);

 	return 0;
 }
--- a/arch/arm/mach-bcmring/irq.c
+++ b/arch/arm/mach-bcmring/irq.c
@ -67,21 +67,21 @@ static void bcmring_unmask_irq2(unsigned int irq)
 }

 static struct irq_chip bcmring_irq0_chip = {
-	.typename = "ARM-INTC0",
+	.name = "ARM-INTC0",
 	.ack = bcmring_mask_irq0,
 	.mask = bcmring_mask_irq0,	/* mask a specific interrupt, blocking its delivery. */
 	.unmask = bcmring_unmask_irq0,	/* unmaks an interrupt */
 };

 static struct irq_chip bcmring_irq1_chip = {
-	.typename = "ARM-INTC1",
+	.name = "ARM-INTC1",
 	.ack = bcmring_mask_irq1,
 	.mask = bcmring_mask_irq1,
 	.unmask = bcmring_unmask_irq1,
 };

 static struct irq_chip bcmring_irq2_chip = {
-	.typename = "ARM-SINTC",
+	.name = "ARM-SINTC",
 	.ack = bcmring_mask_irq2,
 	.mask = bcmring_mask_irq2,
 	.unmask = bcmring_unmask_irq2,
--- a/arch/arm/mach-iop13xx/msi.c
+++ b/arch/arm/mach-iop13xx/msi.c
@ -164,10 +164,10 @@ static void iop13xx_msi_nop(unsigned int irq)
 static struct irq_chip iop13xx_msi_chip = {
 	.name = "PCI-MSI",
 	.ack = iop13xx_msi_nop,
-	.enable = unmask_msi_irq,
-	.disable = mask_msi_irq,
-	.mask = mask_msi_irq,
-	.unmask = unmask_msi_irq,
+	.irq_enable = unmask_msi_irq,
+	.irq_disable = mask_msi_irq,
+	.irq_mask = mask_msi_irq,
+	.irq_unmask = unmask_msi_irq,
 };

 int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@ -6,12 +6,6 @@
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 */

-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-#include <asm/processor.h>
-
 /*
 * No irq_cpustat_t for IA-64.  The data is held in the per-CPU data structure.
 */
@ -20,6 +14,11 @@

 #define local_softirq_pending()		(local_cpu_data->softirq_pending)

+#include <linux/threads.h>
+#include <linux/irq.h>
+
+#include <asm/processor.h>
+
 extern void __iomem *ipi_base_addr;

 void ack_bad_irq(unsigned int irq);
--- a/arch/ia64/include/asm/system.h
+++ b/arch/ia64/include/asm/system.h
@ -272,10 +272,6 @@ void cpu_idle_wait(void);

 void default_idle(void);

-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void account_system_vtime(struct task_struct *);
-#endif
-
 #endif /* __KERNEL__ */

 #endif /* __ASSEMBLY__ */
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@ -104,8 +104,8 @@ static int ia64_msi_retrigger_irq(unsigned int irq)
 */
 static struct irq_chip ia64_msi_chip = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= ia64_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity	= ia64_set_msi_irq_affinity,
@ -160,8 +160,8 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)

 static struct irq_chip dmar_msi_type = {
 	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
 	.ack = ia64_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity = dmar_msi_set_affinity,
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@ -228,8 +228,8 @@ static int sn_msi_retrigger_irq(unsigned int irq)

 static struct irq_chip sn_msi_chip = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= sn_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity	= sn_set_msi_irq_affinity,
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);

 		for (action=action->next; action; action = action->next)
--- a/arch/m32r/platforms/m32104ut/setup.c
+++ b/arch/m32r/platforms/m32104ut/setup.c
@ -65,7 +65,7 @@ static void shutdown_m32104ut_irq(unsigned int irq)

 static struct irq_chip m32104ut_irq_type =
 {
-	.typename = "M32104UT-IRQ",
+	.name = "M32104UT-IRQ",
 	.startup = startup_m32104ut_irq,
 	.shutdown = shutdown_m32104ut_irq,
 	.enable = enable_m32104ut_irq,
--- a/arch/m32r/platforms/m32700ut/setup.c
+++ b/arch/m32r/platforms/m32700ut/setup.c
@ -71,7 +71,7 @@ static void shutdown_m32700ut_irq(unsigned int irq)

 static struct irq_chip m32700ut_irq_type =
 {
-	.typename = "M32700UT-IRQ",
+	.name = "M32700UT-IRQ",
 	.startup = startup_m32700ut_irq,
 	.shutdown = shutdown_m32700ut_irq,
 	.enable = enable_m32700ut_irq,
@ -148,7 +148,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)

 static struct irq_chip m32700ut_pld_irq_type =
 {
-	.typename = "M32700UT-PLD-IRQ",
+	.name = "M32700UT-PLD-IRQ",
 	.startup = startup_m32700ut_pld_irq,
 	.shutdown = shutdown_m32700ut_pld_irq,
 	.enable = enable_m32700ut_pld_irq,
@ -217,7 +217,7 @@ static void shutdown_m32700ut_lanpld_irq(unsigned int irq)

 static struct irq_chip m32700ut_lanpld_irq_type =
 {
-	.typename = "M32700UT-PLD-LAN-IRQ",
+	.name = "M32700UT-PLD-LAN-IRQ",
 	.startup = startup_m32700ut_lanpld_irq,
 	.shutdown = shutdown_m32700ut_lanpld_irq,
 	.enable = enable_m32700ut_lanpld_irq,
@ -286,7 +286,7 @@ static void shutdown_m32700ut_lcdpld_irq(unsigned int irq)

 static struct irq_chip m32700ut_lcdpld_irq_type =
 {
-	.typename = "M32700UT-PLD-LCD-IRQ",
+	.name = "M32700UT-PLD-LCD-IRQ",
 	.startup = startup_m32700ut_lcdpld_irq,
 	.shutdown = shutdown_m32700ut_lcdpld_irq,
 	.enable = enable_m32700ut_lcdpld_irq,
--- a/arch/m32r/platforms/mappi/setup.c
+++ b/arch/m32r/platforms/mappi/setup.c
@ -65,7 +65,7 @@ static void shutdown_mappi_irq(unsigned int irq)

 static struct irq_chip mappi_irq_type =
 {
-	.typename = "MAPPI-IRQ",
+	.name = "MAPPI-IRQ",
 	.startup = startup_mappi_irq,
 	.shutdown = shutdown_mappi_irq,
 	.enable = enable_mappi_irq,
--- a/arch/m32r/platforms/mappi2/setup.c
+++ b/arch/m32r/platforms/mappi2/setup.c
@ -72,7 +72,7 @@ static void shutdown_mappi2_irq(unsigned int irq)

 static struct irq_chip mappi2_irq_type =
 {
-	.typename = "MAPPI2-IRQ",
+	.name = "MAPPI2-IRQ",
 	.startup = startup_mappi2_irq,
 	.shutdown = shutdown_mappi2_irq,
 	.enable = enable_mappi2_irq,
--- a/arch/m32r/platforms/mappi3/setup.c
+++ b/arch/m32r/platforms/mappi3/setup.c
@ -72,7 +72,7 @@ static void shutdown_mappi3_irq(unsigned int irq)

 static struct irq_chip mappi3_irq_type =
 {
-	.typename = "MAPPI3-IRQ",
+	.name = "MAPPI3-IRQ",
 	.startup = startup_mappi3_irq,
 	.shutdown = shutdown_mappi3_irq,
 	.enable = enable_mappi3_irq,
--- a/arch/m32r/platforms/oaks32r/setup.c
+++ b/arch/m32r/platforms/oaks32r/setup.c
@ -63,7 +63,7 @@ static void shutdown_oaks32r_irq(unsigned int irq)

 static struct irq_chip oaks32r_irq_type =
 {
-	.typename = "OAKS32R-IRQ",
+	.name = "OAKS32R-IRQ",
 	.startup = startup_oaks32r_irq,
 	.shutdown = shutdown_oaks32r_irq,
 	.enable = enable_oaks32r_irq,
--- a/arch/m32r/platforms/opsput/setup.c
+++ b/arch/m32r/platforms/opsput/setup.c
@ -72,7 +72,7 @@ static void shutdown_opsput_irq(unsigned int irq)

 static struct irq_chip opsput_irq_type =
 {
-	.typename = "OPSPUT-IRQ",
+	.name = "OPSPUT-IRQ",
 	.startup = startup_opsput_irq,
 	.shutdown = shutdown_opsput_irq,
 	.enable = enable_opsput_irq,
@ -149,7 +149,7 @@ static void shutdown_opsput_pld_irq(unsigned int irq)

 static struct irq_chip opsput_pld_irq_type =
 {
-	.typename = "OPSPUT-PLD-IRQ",
+	.name = "OPSPUT-PLD-IRQ",
 	.startup = startup_opsput_pld_irq,
 	.shutdown = shutdown_opsput_pld_irq,
 	.enable = enable_opsput_pld_irq,
@ -218,7 +218,7 @@ static void shutdown_opsput_lanpld_irq(unsigned int irq)

 static struct irq_chip opsput_lanpld_irq_type =
 {
-	.typename = "OPSPUT-PLD-LAN-IRQ",
+	.name = "OPSPUT-PLD-LAN-IRQ",
 	.startup = startup_opsput_lanpld_irq,
 	.shutdown = shutdown_opsput_lanpld_irq,
 	.enable = enable_opsput_lanpld_irq,
--- a/arch/m32r/platforms/usrv/setup.c
+++ b/arch/m32r/platforms/usrv/setup.c
@ -63,7 +63,7 @@ static void shutdown_mappi_irq(unsigned int irq)

 static struct irq_chip mappi_irq_type =
 {
-	.typename = "M32700-IRQ",
+	.name = "M32700-IRQ",
 	.startup = startup_mappi_irq,
 	.shutdown = shutdown_mappi_irq,
 	.enable = enable_mappi_irq,
@ -136,7 +136,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)

 static struct irq_chip m32700ut_pld_irq_type =
 {
-	.typename = "USRV-PLD-IRQ",
+	.name = "USRV-PLD-IRQ",
 	.startup = startup_m32700ut_pld_irq,
 	.shutdown = shutdown_m32700ut_pld_irq,
 	.enable = enable_m32700ut_pld_irq,
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@ -103,7 +103,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
 	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
 		goto out_unlock;

-	retval = security_task_setscheduler(p, 0, NULL);
+	retval = security_task_setscheduler(p)
 	if (retval)
 		goto out_unlock;

--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long);

 #define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))

-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void account_system_vtime(struct task_struct *);
-#endif
-
 extern struct dentry *powerpc_debugfs_root;

 #endif /* __KERNEL__ */
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
 }

 static struct irq_chip msic_irq_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.shutdown	= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_shutdown	= mask_msi_irq,
 	.name		= "AXON-MSI",
 };

--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq)
 	 * at that level, so we do it here by hand.
 	 */
 	if (irq_to_desc(virq)->msi_desc)
-		unmask_msi_irq(virq);
+		unmask_msi_irq(irq_get_irq_data(virq));

 	/* unmask it */
 	xics_unmask_irq(virq);
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@ -51,8 +51,8 @@ static void fsl_msi_end_irq(unsigned int virq)
 }

 static struct irq_chip fsl_msi_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= fsl_msi_end_irq,
 	.name		= "FSL-MSI",
 };
--- a/arch/powerpc/sysdev/mpic_pasemi_msi.c
+++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c
@ -39,24 +39,24 @@
 static struct mpic *msi_mpic;


-static void mpic_pasemi_msi_mask_irq(unsigned int irq)
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq);
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }

-static void mpic_pasemi_msi_unmask_irq(unsigned int irq)
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq);
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }

 static struct irq_chip mpic_pasemi_msi_chip = {
-	.shutdown	= mpic_pasemi_msi_mask_irq,
-	.mask		= mpic_pasemi_msi_mask_irq,
-	.unmask		= mpic_pasemi_msi_unmask_irq,
+	.irq_shutdown	= mpic_pasemi_msi_mask_irq,
+	.irq_mask	= mpic_pasemi_msi_mask_irq,
+	.irq_unmask	= mpic_pasemi_msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@ -23,22 +23,22 @@
 /* A bit ugly, can we get this from the pci_dev somehow? */
 static struct mpic *msi_mpic;

-static void mpic_u3msi_mask_irq(unsigned int irq)
+static void mpic_u3msi_mask_irq(struct irq_data *data)
 {
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }

-static void mpic_u3msi_unmask_irq(unsigned int irq)
+static void mpic_u3msi_unmask_irq(struct irq_data *data)
 {
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }

 static struct irq_chip mpic_u3msi_chip = {
-	.shutdown	= mpic_u3msi_mask_irq,
-	.mask		= mpic_u3msi_mask_irq,
-	.unmask		= mpic_u3msi_unmask_irq,
+	.irq_shutdown	= mpic_u3msi_mask_irq,
+	.irq_mask	= mpic_u3msi_mask_irq,
+	.irq_unmask	= mpic_u3msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@ -199,6 +199,13 @@ config HOTPLUG_CPU
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.

+config SCHED_BOOK
+	bool "Book scheduler support"
+	depends on SMP
+	help
+	  Book scheduler support improves the CPU scheduler's decision making
+	  when dealing with machines that have several books.
+
 config MATHEMU
 	bool "IEEE FPU emulation"
 	depends on MARCH_G5
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@ -12,10 +12,6 @@
 #ifndef __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H

-#include <linux/threads.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
 #include <asm/lowcore.h>

 #define local_softirq_pending() (S390_lowcore.softirq_pending)
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@ -97,7 +97,6 @@ static inline void restore_access_regs(unsigned int *acrs)

 extern void account_vtime(struct task_struct *, struct task_struct *);
 extern void account_tick_vtime(struct task_struct *);
-extern void account_system_vtime(struct task_struct *);

 #ifdef CONFIG_PFAULT
 extern void pfault_irq_init(void);
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@ -3,15 +3,32 @@

 #include <linux/cpumask.h>

-#define mc_capable()	(1)
-
-const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
-
 extern unsigned char cpu_core_id[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];

+static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+	return &cpu_core_map[cpu];
+}
+
 #define topology_core_id(cpu)		(cpu_core_id[cpu])
 #define topology_core_cpumask(cpu)	(&cpu_core_map[cpu])
+#define mc_capable()			(1)
+
+#ifdef CONFIG_SCHED_BOOK
+
+extern unsigned char cpu_book_id[NR_CPUS];
+extern cpumask_t cpu_book_map[NR_CPUS];
+
+static inline const struct cpumask *cpu_book_mask(unsigned int cpu)
+{
+	return &cpu_book_map[cpu];
+}
+
+#define topology_book_id(cpu)		(cpu_book_id[cpu])
+#define topology_book_cpumask(cpu)	(&cpu_book_map[cpu])
+
+#endif /* CONFIG_SCHED_BOOK */

 int topology_set_cpu_management(int fc);
 void topology_schedule_update(void);
@ -30,6 +47,8 @@ static inline void s390_init_cpu_topology(void)
 };
 #endif

+#define SD_BOOK_INIT	SD_CPU_INIT
+
 #include <asm-generic/topology.h>

 #endif /* _ASM_S390_TOPOLOGY_H */
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@ -57,8 +57,8 @@ struct tl_info {
 	union tl_entry tle[0];
 };

-struct core_info {
-	struct core_info *next;
+struct mask_info {
+	struct mask_info *next;
 	unsigned char id;
 	cpumask_t mask;
 };
@ -66,7 +66,6 @@ struct core_info {
 static int topology_enabled;
 static void topology_work_fn(struct work_struct *work);
 static struct tl_info *tl_info;
-static struct core_info core_info;
 static int machine_has_topology;
 static struct timer_list topology_timer;
 static void set_topology_timer(void);
@ -74,38 +73,37 @@ static DECLARE_WORK(topology_work, topology_work_fn);
 /* topology_lock protects the core linked list */
 static DEFINE_SPINLOCK(topology_lock);

+static struct mask_info core_info;
 cpumask_t cpu_core_map[NR_CPUS];
 unsigned char cpu_core_id[NR_CPUS];

-static cpumask_t cpu_coregroup_map(unsigned int cpu)
+#ifdef CONFIG_SCHED_BOOK
+static struct mask_info book_info;
+cpumask_t cpu_book_map[NR_CPUS];
+unsigned char cpu_book_id[NR_CPUS];
+#endif
+
+static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 {
-	struct core_info *core = &core_info;
-	unsigned long flags;
 	cpumask_t mask;

 	cpus_clear(mask);
 	if (!topology_enabled || !machine_has_topology)
 		return cpu_possible_map;
-	spin_lock_irqsave(&topology_lock, flags);
-	while (core) {
-		if (cpu_isset(cpu, core->mask)) {
-			mask = core->mask;
+	while (info) {
+		if (cpu_isset(cpu, info->mask)) {
+			mask = info->mask;
 			break;
 		}
-		core = core->next;
+		info = info->next;
 	}
-	spin_unlock_irqrestore(&topology_lock, flags);
 	if (cpus_empty(mask))
 		mask = cpumask_of_cpu(cpu);
 	return mask;
 }

-const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
-{
-	return &cpu_core_map[cpu];
-}
-
-static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
+static void add_cpus_to_mask(struct tl_cpu *tl_cpu, struct mask_info *book,
+			     struct mask_info *core)
 {
 	unsigned int cpu;

@ -117,23 +115,35 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)

 		rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
 		for_each_present_cpu(lcpu) {
-			if (cpu_logical_map(lcpu) == rcpu) {
-				cpu_set(lcpu, core->mask);
-				cpu_core_id[lcpu] = core->id;
-				smp_cpu_polarization[lcpu] = tl_cpu->pp;
-			}
+			if (cpu_logical_map(lcpu) != rcpu)
+				continue;
+#ifdef CONFIG_SCHED_BOOK
+			cpu_set(lcpu, book->mask);
+			cpu_book_id[lcpu] = book->id;
+#endif
+			cpu_set(lcpu, core->mask);
+			cpu_core_id[lcpu] = core->id;
+			smp_cpu_polarization[lcpu] = tl_cpu->pp;
 		}
 	}
 }

-static void clear_cores(void)
+static void clear_masks(void)
 {
-	struct core_info *core = &core_info;
+	struct mask_info *info;

-	while (core) {
-		cpus_clear(core->mask);
-		core = core->next;
+	info = &core_info;
+	while (info) {
+		cpus_clear(info->mask);
+		info = info->next;
 	}
+#ifdef CONFIG_SCHED_BOOK
+	info = &book_info;
+	while (info) {
+		cpus_clear(info->mask);
+		info = info->next;
+	}
+#endif
 }

 static union tl_entry *next_tle(union tl_entry *tle)
@ -146,29 +156,36 @@ static union tl_entry *next_tle(union tl_entry *tle)

 static void tl_to_cores(struct tl_info *info)
 {
+#ifdef CONFIG_SCHED_BOOK
+	struct mask_info *book = &book_info;
+#else
+	struct mask_info *book = NULL;
+#endif
+	struct mask_info *core = &core_info;
 	union tl_entry *tle, *end;
-	struct core_info *core = &core_info;
+

 	spin_lock_irq(&topology_lock);
-	clear_cores();
+	clear_masks();
 	tle = info->tle;
 	end = (union tl_entry *)((unsigned long)info + info->length);
 	while (tle < end) {
 		switch (tle->nl) {
-		case 5:
-		case 4:
-		case 3:
+#ifdef CONFIG_SCHED_BOOK
 		case 2:
+			book = book->next;
+			book->id = tle->container.id;
 			break;
+#endif
 		case 1:
 			core = core->next;
 			core->id = tle->container.id;
 			break;
 		case 0:
-			add_cpus_to_core(&tle->cpu, core);
+			add_cpus_to_mask(&tle->cpu, book, core);
 			break;
 		default:
-			clear_cores();
+			clear_masks();
 			machine_has_topology = 0;
 			goto out;
 		}
@ -221,10 +238,29 @@ int topology_set_cpu_management(int fc)

 static void update_cpu_core_map(void)
 {
+	unsigned long flags;
 	int cpu;

-	for_each_possible_cpu(cpu)
-		cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+	spin_lock_irqsave(&topology_lock, flags);
+	for_each_possible_cpu(cpu) {
+		cpu_core_map[cpu] = cpu_group_map(&core_info, cpu);
+#ifdef CONFIG_SCHED_BOOK
+		cpu_book_map[cpu] = cpu_group_map(&book_info, cpu);
+#endif
+	}
+	spin_unlock_irqrestore(&topology_lock, flags);
+}
+
+static void store_topology(struct tl_info *info)
+{
+#ifdef CONFIG_SCHED_BOOK
+	int rc;
+
+	rc = stsi(info, 15, 1, 3);
+	if (rc != -ENOSYS)
+		return;
+#endif
+	stsi(info, 15, 1, 2);
 }

 int arch_update_cpu_topology(void)
@ -238,7 +274,7 @@ int arch_update_cpu_topology(void)
 		topology_update_polarization_simple();
 		return 0;
 	}
-	stsi(info, 15, 1, 2);
+	store_topology(info);
 	tl_to_cores(info);
 	update_cpu_core_map();
 	for_each_online_cpu(cpu) {
@ -299,12 +335,24 @@ out:
 }
 __initcall(init_topology_update);

+static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset)
+{
+	int i, nr_masks;
+
+	nr_masks = info->mag[NR_MAG - offset];
+	for (i = 0; i < info->mnest - offset; i++)
+		nr_masks *= info->mag[NR_MAG - offset - 1 - i];
+	nr_masks = max(nr_masks, 1);
+	for (i = 0; i < nr_masks; i++) {
+		mask->next = alloc_bootmem(sizeof(struct mask_info));
+		mask = mask->next;
+	}
+}
+
 void __init s390_init_cpu_topology(void)
 {
 	unsigned long long facility_bits;
 	struct tl_info *info;
-	struct core_info *core;
-	int nr_cores;
 	int i;

 	if (stfle(&facility_bits, 1) <= 0)
@ -315,25 +363,13 @@ void __init s390_init_cpu_topology(void)

 	tl_info = alloc_bootmem_pages(PAGE_SIZE);
 	info = tl_info;
-	stsi(info, 15, 1, 2);
-
-	nr_cores = info->mag[NR_MAG - 2];
-	for (i = 0; i < info->mnest - 2; i++)
-		nr_cores *= info->mag[NR_MAG - 3 - i];
-
+	store_topology(info);
 	pr_info("The CPU configuration topology of the machine is:");
 	for (i = 0; i < NR_MAG; i++)
 		printk(" %d", info->mag[i]);
 	printk(" / %d\n", info->mnest);
-
-	core = &core_info;
-	for (i = 0; i < nr_cores; i++) {
-		core->next = alloc_bootmem(sizeof(struct core_info));
-		core = core->next;
-		if (!core)
-			goto error;
-	}
-	return;
-error:
-	machine_has_topology = 0;
+	alloc_masks(info, &core_info, 2);
+#ifdef CONFIG_SCHED_BOOK
+	alloc_masks(info, &book_info, 3);
+#endif
 }
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@ -290,7 +290,7 @@ void __init init_IRQ(void)
 int __init arch_probe_nr_irqs(void)
 {
 	nr_irqs = sh_mv.mv_nr_irqs;
-	return 0;
+	return NR_IRQS_LEGACY;
 }
 #endif

--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@ -114,10 +114,10 @@ static void free_msi(struct pci_pbm_info *pbm, int msi_num)

 static struct irq_chip msi_irq = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.enable		= unmask_msi_irq,
-	.disable	= mask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_enable	= unmask_msi_irq,
+	.irq_disable	= mask_msi_irq,
 	/* XXX affinity XXX */
 };

--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@ -208,7 +208,7 @@ static void tile_irq_chip_eoi(unsigned int irq)
 }

 static struct irq_chip tile_irq_chip = {
-	.typename = "tile_irq_chip",
+	.name = "tile_irq_chip",
 	.ack = tile_irq_chip_ack,
 	.eoi = tile_irq_chip_eoi,
 	.mask = tile_irq_chip_mask,
@ -288,7 +288,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);

 		for (action = action->next; action; action = action->next)
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@ -46,7 +46,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);

 		for (action=action->next; action; action = action->next)
@ -369,7 +369,7 @@ static void dummy(unsigned int irq)

 /* This is used for everything else than the timer. */
 static struct irq_chip normal_irq_type = {
-	.typename = "SIGIO",
+	.name = "SIGIO",
 	.release = free_irq_by_irq_and_dev,
 	.disable = dummy,
 	.enable = dummy,
@ -378,7 +378,7 @@ static struct irq_chip normal_irq_type = {
 };

 static struct irq_chip SIGVTALRM_irq_type = {
-	.typename = "SIGVTALRM",
+	.name = "SIGVTALRM",
 	.release = free_irq_by_irq_and_dev,
 	.shutdown = dummy, /* never called */
 	.disable = dummy,
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -63,6 +63,10 @@ config X86
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_TEXT_POKE_SMP
+	select HAVE_GENERIC_HARDIRQS
+	select HAVE_SPARSE_IRQ
+	select GENERIC_IRQ_PROBE
+	select GENERIC_PENDING_IRQ if SMP

 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
@ -204,20 +208,6 @@ config HAVE_INTEL_TXT
 	def_bool y
 	depends on EXPERIMENTAL && DMAR && ACPI

-# Use the generic interrupt handling code in kernel/irq/:
-config GENERIC_HARDIRQS
-	def_bool y
-
-config GENERIC_HARDIRQS_NO__DO_IRQ
-       def_bool y
-
-config GENERIC_IRQ_PROBE
-	def_bool y
-
-config GENERIC_PENDING_IRQ
-	def_bool y
-	depends on GENERIC_HARDIRQS && SMP
-
 config USE_GENERIC_SMP_HELPERS
 	def_bool y
 	depends on SMP
@ -300,23 +290,6 @@ config X86_X2APIC

 	  If you don't know what to do here, say N.

-config SPARSE_IRQ
-	bool "Support sparse irq numbering"
-	depends on PCI_MSI || HT_IRQ
-	---help---
-	  This enables support for sparse irqs. This is useful for distro
-	  kernels that want to define a high CONFIG_NR_CPUS value but still
-	  want to have low kernel memory footprint on smaller machines.
-
-	  ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
-	    out the irq_desc[] array in a more NUMA-friendly way. )
-
-	  If you don't know what to do here, say N.
-
-config NUMA_IRQ_DESC
-	def_bool y
-	depends on SPARSE_IRQ && NUMA
-
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
 	default y
@ -521,25 +494,6 @@ if PARAVIRT_GUEST

 source "arch/x86/xen/Kconfig"

-config VMI
-	bool "VMI Guest support (DEPRECATED)"
-	select PARAVIRT
-	depends on X86_32
-	---help---
-	  VMI provides a paravirtualized interface to the VMware ESX server
-	  (it could be used by other hypervisors in theory too, but is not
-	  at the moment), by linking the kernel to a GPL-ed ROM module
-	  provided by the hypervisor.
-
-	  As of September 2009, VMware has started a phased retirement
-	  of this feature from VMware's products. Please see
-	  feature-removal-schedule.txt for details.  If you are
-	  planning to enable this option, please note that you cannot
-	  live migrate a VMI enabled VM to a future VMware product,
-	  which doesn't support VMI. So if you expect your kernel to
-	  seamlessly migrate to newer VMware products, keep this
-	  disabled.
-
 config KVM_CLOCK
 	bool "KVM paravirtualized clock"
 	select PARAVIRT
@ -674,7 +628,7 @@ config GART_IOMMU
 	bool "GART IOMMU support" if EMBEDDED
 	default y
 	select SWIOTLB
-	depends on X86_64 && PCI && K8_NB
+	depends on X86_64 && PCI && AMD_NB
 	---help---
 	  Support for full DMA access of devices with 32bit memory access only
 	  on systems with more than 3GB. This is usually needed for USB,
@ -799,6 +753,17 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.

+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	default n
+	---help---
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
 source "kernel/Kconfig.preempt"

 config X86_UP_APIC
@ -1152,6 +1117,9 @@ config X86_PAE
 config ARCH_PHYS_ADDR_T_64BIT
 	def_bool X86_64 || X86_PAE

+config ARCH_DMA_ADDR_T_64BIT
+	def_bool X86_64 || HIGHMEM64G
+
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
 	default y
@ -1330,25 +1298,34 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
 	  Set whether the default state of memory_corruption_check is
 	  on or off.

-config X86_RESERVE_LOW_64K
-	bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
-	default y
+config X86_RESERVE_LOW
+	int "Amount of low memory, in kilobytes, to reserve for the BIOS"
+	default 64
+	range 4 640
 	---help---
-	  Reserve the first 64K of physical RAM on BIOSes that are known
-	  to potentially corrupt that memory range. A numbers of BIOSes are
-	  known to utilize this area during suspend/resume, so it must not
-	  be used by the kernel.
+	  Specify the amount of low memory to reserve for the BIOS.

-	  Set this to N if you are absolutely sure that you trust the BIOS
-	  to get all its memory reservations and usages right.
+	  The first page contains BIOS data structures that the kernel
+	  must not use, so that page must always be reserved.

-	  If you have doubts about the BIOS (e.g. suspend/resume does not
-	  work or there's kernel crashes after certain hardware hotplug
-	  events) and it's not AMI or Phoenix, then you might want to enable
-	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
-	  corruption patterns.
+	  By default we reserve the first 64K of physical RAM, as a
+	  number of BIOSes are known to corrupt that memory range
+	  during events such as suspend/resume or monitor cable
+	  insertion, so it must not be used by the kernel.

-	  Say Y if unsure.
+	  You can set this to 4 if you are absolutely sure that you
+	  trust the BIOS to get all its memory reservations and usages
+	  right.  If you know your BIOS have problems beyond the
+	  default 64K area, you can set this to 640 to avoid using the
+	  entire low memory range.
+
+	  If you have doubts about the BIOS (e.g. suspend/resume does
+	  not work or there's kernel crashes after certain hardware
+	  hotplug events) then you might want to enable
+	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
+	  typical corruption patterns.
+
+	  Leave this to the default value of 64 if you are unsure.

 config MATH_EMULATION
 	bool
@ -1904,7 +1881,7 @@ config PCI_GODIRECT
 	bool "Direct"

 config PCI_GOOLPC
-	bool "OLPC"
+	bool "OLPC XO-1"
 	depends on OLPC

 config PCI_GOANY
@ -2065,14 +2042,21 @@ config SCx200HR_TIMER
 config OLPC
 	bool "One Laptop Per Child support"
 	select GPIOLIB
+	select OLPC_OPENFIRMWARE
 	---help---
 	  Add support for detecting the unique features of the OLPC
 	  XO hardware.

+config OLPC_XO1
+	tristate "OLPC XO-1 support"
+	depends on OLPC && PCI
+	---help---
+	  Add support for non-essential features of the OLPC XO-1 laptop.
+
 config OLPC_OPENFIRMWARE
 	bool "Support for OLPC's Open Firmware"
 	depends on !X86_64 && !X86_PAE
-	default y if OLPC
+	default n
 	help
 	  This option adds support for the implementation of Open Firmware
 	  that is used on the OLPC XO-1 Children's Machine.
@ -2080,7 +2064,7 @@ config OLPC_OPENFIRMWARE

 endif # X86_32

-config K8_NB
+config AMD_NB
 	def_bool y
 	depends on CPU_SUP_AMD && PCI

--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@ -43,6 +43,10 @@ config EARLY_PRINTK
 	  with klogd/syslogd or the X server. You should normally N here,
 	  unless you want to debug such a crash.

+config EARLY_PRINTK_MRST
+	bool "Early printk for MRST platform support"
+	depends on EARLY_PRINTK && X86_MRST
+
 config EARLY_PRINTK_DBGP
 	bool "Early printk via EHCI debug port"
 	depends on EARLY_PRINTK && PCI
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@ -96,8 +96,12 @@ cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_en
 # is .cfi_signal_frame supported too?
 cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
 cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections)
+
+# does binutils support specific instructions?
+asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
+
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)

 LDFLAGS := -m elf_$(UTS_MACHINE)

--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <joerg.roedel@amd.com>
 *         Leo Duran <leo.duran@amd.com>
 *
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <joerg.roedel@amd.com>
 *
 * This program is free software; you can redistribute it and/or modify it
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <joerg.roedel@amd.com>
 *         Leo Duran <leo.duran@amd.com>
 *
@ -416,13 +416,22 @@ struct amd_iommu {
 	struct dma_ops_domain *default_dom;

 	/*
-	 * This array is required to work around a potential BIOS bug.
-	 * The BIOS may miss to restore parts of the PCI configuration
-	 * space when the system resumes from S3. The result is that the
-	 * IOMMU does not execute commands anymore which leads to system
-	 * failure.
+	 * We can't rely on the BIOS to restore all values on reinit, so we
+	 * need to stash them
 	 */
-	u32 cache_cfg[4];
+
+	/* The iommu BAR */
+	u32 stored_addr_lo;
+	u32 stored_addr_hi;
+
+	/*
+	 * Each iommu has 6 l1s, each of which is documented as having 0x12
+	 * registers
+	 */
+	u32 stored_l1[6][0x12];
+
+	/* The l2 indirect registers */
+	u32 stored_l2[0x83];
 };

 /*
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@ -1,5 +1,5 @@
-#ifndef _ASM_X86_K8_H
-#define _ASM_X86_K8_H
+#ifndef _ASM_X86_AMD_NB_H
+#define _ASM_X86_AMD_NB_H

 #include <linux/pci.h>

@ -7,24 +7,27 @@ extern struct pci_device_id k8_nb_ids[];
 struct bootnode;

 extern int early_is_k8_nb(u32 value);
-extern struct pci_dev **k8_northbridges;
-extern int num_k8_northbridges;
 extern int cache_k8_northbridges(void);
 extern void k8_flush_garts(void);
 extern int k8_get_nodes(struct bootnode *nodes);
 extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
 extern int k8_scan_nodes(void);

-#ifdef CONFIG_K8_NB
-extern int num_k8_northbridges;
+struct k8_northbridge_info {
+	u16 num;
+	u8 gart_supported;
+	struct pci_dev **nb_misc;
+};
+extern struct k8_northbridge_info k8_northbridges;
+
+#ifdef CONFIG_AMD_NB

 static inline struct pci_dev *node_to_k8_nb_misc(int node)
 {
-	return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
+	return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
 }

 #else
-#define num_k8_northbridges 0

 static inline struct pci_dev *node_to_k8_nb_misc(int node)
 {
@ -33,4 +36,4 @@ static inline struct pci_dev *node_to_k8_nb_misc(int node)
 #endif


-#endif /* _ASM_X86_K8_H */
+#endif /* _ASM_X86_AMD_NB_H */
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@ -54,7 +54,6 @@ extern struct clock_event_device *global_clock_event;
 extern unsigned long apbt_quick_calibrate(void);
 extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
 extern void apbt_setup_secondary_clock(void);
-extern unsigned int boot_cpu_id;

 extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
 extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@ -252,9 +252,7 @@ static inline int apic_is_clustered_box(void)
 }
 #endif

-extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask);
-extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
-
+extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);

 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@ -131,6 +131,7 @@
 #define APIC_EILVTn(n)	(0x500 + 0x10 * n)
 #define		APIC_EILVT_NR_AMD_K8	1	/* # of extended interrupts */
 #define		APIC_EILVT_NR_AMD_10H	4
+#define		APIC_EILVT_NR_MAX	APIC_EILVT_NR_AMD_10H
 #define		APIC_EILVT_LVTOFF(x)	(((x) >> 4) & 0xF)
 #define		APIC_EILVT_MSG_FIX	0x0
 #define		APIC_EILVT_MSG_SMI	0x2
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@ -32,6 +32,5 @@ extern void arch_unregister_cpu(int);

 DECLARE_PER_CPU(int, cpu_state);

-extern unsigned int boot_cpu_id;

 #endif /* _ASM_X86_CPU_H */
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@ -152,10 +152,14 @@
 #define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
 #define X86_FEATURE_OSVW	(6*32+ 9) /* OS Visible Workaround */
 #define X86_FEATURE_IBS		(6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_SSE5	(6*32+11) /* SSE-5 */
+#define X86_FEATURE_XOP		(6*32+11) /* extended AVX instructions */
 #define X86_FEATURE_SKINIT	(6*32+12) /* SKINIT/STGI instructions */
 #define X86_FEATURE_WDT		(6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP		(6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4	(6*32+16) /* 4 operands MAC instructions */
 #define X86_FEATURE_NODEID_MSR	(6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM		(6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT	(6*32+22) /* topology extensions CPUID leafs */

 /*
 * Auxiliary flags: Linux defined - For features scattered in various
@ -180,6 +184,13 @@
 #define X86_FEATURE_LBRV	(8*32+ 6) /* AMD LBR Virtualization support */
 #define X86_FEATURE_SVML	(8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
 #define X86_FEATURE_NRIPS	(8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR  (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN   (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */
+

 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@ -89,6 +89,16 @@
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm

+	.macro pushfq_cfi
+	pushfq
+	CFI_ADJUST_CFA_OFFSET 8
+	.endm
+
+	.macro popfq_cfi
+	popfq
+	CFI_ADJUST_CFA_OFFSET -8
+	.endm
+
 	.macro movq_cfi reg offset=0
 	movq %\reg, \offset(%rsp)
 	CFI_REL_OFFSET \reg, \offset
@ -109,6 +119,16 @@
 	CFI_ADJUST_CFA_OFFSET -4
 	.endm

+	.macro pushfl_cfi
+	pushfl
+	CFI_ADJUST_CFA_OFFSET 4
+	.endm
+
+	.macro popfl_cfi
+	popfl
+	CFI_ADJUST_CFA_OFFSET -4
+	.endm
+
 	.macro movl_cfi reg offset=0
 	movl %\reg, \offset(%esp)
 	CFI_REL_OFFSET \reg, \offset
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@ -214,5 +214,20 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
 	return __virt_to_fix(vaddr);
 }
+
+/* Return an pointer with offset calculated */
+static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx,
+				phys_addr_t phys, pgprot_t flags)
+{
+	__set_fixmap(idx, phys, flags);
+	return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
+}
+
+#define set_fixmap_offset(idx, phys)			\
+	__set_fixmap_offset(idx, phys, PAGE_KERNEL)
+
+#define set_fixmap_offset_nocache(idx, phys)			\
+	__set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_FIXMAP_H */
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@ -17,6 +17,7 @@ extern int fix_aperture;
 #define GARTEN		(1<<0)
 #define DISGARTCPU	(1<<4)
 #define DISGARTIO	(1<<5)
+#define DISTLBWALKPRB	(1<<6)

 /* GART cache control register bits. */
 #define INVGART		(1<<0)
@ -27,7 +28,6 @@ extern int fix_aperture;
 #define AMD64_GARTAPERTUREBASE	0x94
 #define AMD64_GARTTABLEBASE	0x98
 #define AMD64_GARTCACHECTL	0x9c
-#define AMD64_GARTEN		(1<<0)

 #ifdef CONFIG_GART_IOMMU
 extern int gart_iommu_aperture;
@ -57,6 +57,19 @@ static inline void gart_iommu_hole_init(void)

 extern int agp_amd64_init(void);

+static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
+{
+	u32 ctl;
+
+	/*
+	 * Don't enable translation but enable GART IO and CPU accesses.
+	 * Also, set DISTLBWALKPRB since GART tables memory is UC.
+	 */
+	ctl = DISTLBWALKPRB | order << 1;
+
+	pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+}
+
 static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
 {
 	u32 tmp, ctl;
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@ -74,10 +74,12 @@ extern void hpet_disable(void);
 extern unsigned int hpet_readl(unsigned int a);
 extern void force_hpet_resume(void);

-extern void hpet_msi_unmask(unsigned int irq);
-extern void hpet_msi_mask(unsigned int irq);
-extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
-extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
+struct irq_data;
+extern void hpet_msi_unmask(struct irq_data *data);
+extern void hpet_msi_mask(struct irq_data *data);
+struct hpet_dev;
+extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
+extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);

 #ifdef CONFIG_PCI_MSI
 extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@ -78,6 +78,13 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 	irq_attr->polarity	= polarity;
 }

+struct irq_2_iommu {
+	struct intel_iommu *iommu;
+	u16 irte_index;
+	u16 sub_handle;
+	u8  irte_mask;
+};
+
 /*
 * This is performance-critical, we want to do it O(1)
 *
@ -89,15 +96,17 @@ struct irq_cfg {
 	cpumask_var_t		old_domain;
 	u8			vector;
 	u8			move_in_progress : 1;
+#ifdef CONFIG_INTR_REMAP
+	struct irq_2_iommu	irq_2_iommu;
+#endif
 };

-extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);

-struct irq_desc;
-extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *,
-				      unsigned int *dest_id);
+struct irq_data;
+int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
+			  unsigned int *dest_id);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);

--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf);
 extern int restore_i387_xstate_ia32(void __user *buf);
 #endif

+#ifdef CONFIG_MATH_EMULATION
+extern void finit_soft_fpu(struct i387_soft_struct *soft);
+#else
+static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
+#endif
+
 #define X87_FSW_ES (1 << 7)	/* Exception Summary */

 static __always_inline __pure bool use_xsaveopt(void)
@ -67,6 +73,11 @@ static __always_inline __pure bool use_xsave(void)
 	return static_cpu_has(X86_FEATURE_XSAVE);
 }

+static __always_inline __pure bool use_fxsr(void)
+{
+        return static_cpu_has(X86_FEATURE_FXSR);
+}
+
 extern void __sanitize_i387_state(struct task_struct *);

 static inline void sanitize_i387_state(struct task_struct *tsk)
@ -77,19 +88,11 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
 }

 #ifdef CONFIG_X86_64
-
-/* Ignore delayed exceptions from user space */
-static inline void tolerant_fwait(void)
-{
-	asm volatile("1: fwait\n"
-		     "2:\n"
-		     _ASM_EXTABLE(1b, 2b));
-}
-
 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
 	int err;

+	/* See comment in fxsave() below. */
 	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
 		     "2:\n"
 		     ".section .fixup,\"ax\"\n"
@ -98,44 +101,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err)
-#if 0 /* See comment in fxsave() below. */
-		     : [fx] "r" (fx), "m" (*fx), "0" (0));
-#else
-		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
-#endif
+		     : [fx] "R" (fx), "m" (*fx), "0" (0));
 	return err;
 }

-/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
-   is pending. Clear the x87 state here by setting it to fixed
-   values. The kernel data segment can be sometimes 0 and sometimes
-   new user value. Both should be ok.
-   Use the PDA as safe address because it should be already in L1. */
-static inline void fpu_clear(struct fpu *fpu)
-{
-	struct xsave_struct *xstate = &fpu->state->xsave;
-	struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-
-	/*
-	 * xsave header may indicate the init state of the FP.
-	 */
-	if (use_xsave() &&
-	    !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
-		return;
-
-	if (unlikely(fx->swd & X87_FSW_ES))
-		asm volatile("fnclex");
-	alternative_input(ASM_NOP8 ASM_NOP2,
-			  "    emms\n"		/* clear stack tags */
-			  "    fildl %%gs:0",	/* load to clear state */
-			  X86_FEATURE_FXSAVE_LEAK);
-}
-
-static inline void clear_fpu_state(struct task_struct *tsk)
-{
-	fpu_clear(&tsk->thread.fpu);
-}
-
 static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 {
 	int err;
@ -149,6 +118,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 	if (unlikely(err))
 		return -EFAULT;

+	/* See comment in fxsave() below. */
 	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
 		     "2:\n"
 		     ".section .fixup,\"ax\"\n"
@ -157,11 +127,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in fxsave() below. */
-		     : [fx] "r" (fx), "0" (0));
-#else
-		     : [fx] "cdaSDb" (fx), "0" (0));
-#endif
+		     : [fx] "R" (fx), "0" (0));
 	if (unlikely(err) &&
 	    __clear_user(fx, sizeof(struct i387_fxsave_struct)))
 		err = -EFAULT;
@ -175,56 +141,29 @@ static inline void fpu_fxsave(struct fpu *fpu)
 	   uses any extended registers for addressing, a second REX prefix
 	   will be generated (to the assembler, rex64 followed by semicolon
 	   is a separate instruction), and hence the 64-bitness is lost. */
-#if 0
+
+#ifdef CONFIG_AS_FXSAVEQ
 	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
 	   starting with gas 2.16. */
 	__asm__ __volatile__("fxsaveq %0"
 			     : "=m" (fpu->state->fxsave));
-#elif 0
+#else
 	/* Using, as a workaround, the properly prefixed form below isn't
 	   accepted by any binutils version so far released, complaining that
 	   the same type of prefix is used twice if an extended register is
-	   needed for addressing (fix submitted to mainline 2005-11-21). */
-	__asm__ __volatile__("rex64/fxsave %0"
-			     : "=m" (fpu->state->fxsave));
-#else
-	/* This, however, we can work around by forcing the compiler to select
+	   needed for addressing (fix submitted to mainline 2005-11-21).
+	asm volatile("rex64/fxsave %0"
+		     : "=m" (fpu->state->fxsave));
+	   This, however, we can work around by forcing the compiler to select
 	   an addressing mode that doesn't require extended registers. */
-	__asm__ __volatile__("rex64/fxsave (%1)"
-			     : "=m" (fpu->state->fxsave)
-			     : "cdaSDb" (&fpu->state->fxsave));
+	asm volatile("rex64/fxsave (%[fx])"
+		     : "=m" (fpu->state->fxsave)
+		     : [fx] "R" (&fpu->state->fxsave));
 #endif
 }

-static inline void fpu_save_init(struct fpu *fpu)
-{
-	if (use_xsave())
-		fpu_xsave(fpu);
-	else
-		fpu_fxsave(fpu);
-
-	fpu_clear(fpu);
-}
-
-static inline void __save_init_fpu(struct task_struct *tsk)
-{
-	fpu_save_init(&tsk->thread.fpu);
-	task_thread_info(tsk)->status &= ~TS_USEDFPU;
-}
-
 #else  /* CONFIG_X86_32 */

-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
-static inline void tolerant_fwait(void)
-{
-	asm volatile("fnclex ; fwait");
-}
-
 /* perform fxrstor iff the processor has extended states, otherwise frstor */
 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
@ -241,6 +180,14 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 	return 0;
 }

+static inline void fpu_fxsave(struct fpu *fpu)
+{
+	asm volatile("fxsave %[fx]"
+		     : [fx] "=m" (fpu->state->fxsave));
+}
+
+#endif	/* CONFIG_X86_64 */
+
 /* We need a safe address that is cheap to find and that is already
   in L1 during context switch. The best choices are unfortunately
   different for UP and SMP */
@ -256,47 +203,33 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 static inline void fpu_save_init(struct fpu *fpu)
 {
 	if (use_xsave()) {
-		struct xsave_struct *xstate = &fpu->state->xsave;
-		struct i387_fxsave_struct *fx = &fpu->state->fxsave;
-
 		fpu_xsave(fpu);

 		/*
 		 * xsave header may indicate the init state of the FP.
 		 */
-		if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
-			goto end;
-
-		if (unlikely(fx->swd & X87_FSW_ES))
-			asm volatile("fnclex");
-
-		/*
-		 * we can do a simple return here or be paranoid :)
-		 */
-		goto clear_state;
+		if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
+			return;
+	} else if (use_fxsr()) {
+		fpu_fxsave(fpu);
+	} else {
+		asm volatile("fsave %[fx]; fwait"
+			     : [fx] "=m" (fpu->state->fsave));
+		return;
 	}

-	/* Use more nops than strictly needed in case the compiler
-	   varies code */
-	alternative_input(
-		"fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
-		"fxsave %[fx]\n"
-		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
-		X86_FEATURE_FXSR,
-		[fx] "m" (fpu->state->fxsave),
-		[fsw] "m" (fpu->state->fxsave.swd) : "memory");
-clear_state:
+	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
+		asm volatile("fnclex");
+
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. safe_address is a random variable that should be in L1 */
 	alternative_input(
-		GENERIC_NOP8 GENERIC_NOP2,
+		ASM_NOP8 ASM_NOP2,
 		"emms\n\t"	  	/* clear stack tags */
-		"fildl %[addr]", 	/* set F?P to defined value */
+		"fildl %P[addr]",	/* set F?P to defined value */
 		X86_FEATURE_FXSAVE_LEAK,
 		[addr] "m" (safe_address));
-end:
-	;
 }

 static inline void __save_init_fpu(struct task_struct *tsk)
@ -305,9 +238,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }

-
-#endif	/* CONFIG_X86_64 */
-
 static inline int fpu_fxrstor_checking(struct fpu *fpu)
 {
 	return fxrstor_checking(&fpu->state->fxsave);
@ -344,7 +274,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
 static inline void __clear_fpu(struct task_struct *tsk)
 {
 	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		tolerant_fwait();
+		/* Ignore delayed exceptions from user space */
+		asm volatile("1: fwait\n"
+			     "2:\n"
+			     _ASM_EXTABLE(1b, 2b));
 		task_thread_info(tsk)->status &= ~TS_USEDFPU;
 		stts();
 	}
@ -405,19 +338,6 @@ static inline void irq_ts_restore(int TS_state)
 		stts();
 }

-#ifdef CONFIG_X86_64
-
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-	__save_init_fpu(tsk);
-	stts();
-}
-
-#define unlazy_fpu	__unlazy_fpu
-#define clear_fpu	__clear_fpu
-
-#else  /* CONFIG_X86_32 */
-
 /*
 * These disable preemption on their own and are safe
 */
@ -443,8 +363,6 @@ static inline void clear_fpu(struct task_struct *tsk)
 	preempt_enable();
 }

-#endif	/* CONFIG_X86_64 */
-
 /*
 * i387 state interaction
 */
@ -508,7 +426,4 @@ extern void fpu_finit(struct fpu *fpu);

 #endif /* __ASSEMBLY__ */

-#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
-#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
-
 #endif /* _ASM_X86_I387_H */
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@ -55,6 +55,8 @@ extern struct irq_chip i8259A_chip;
 struct legacy_pic {
 	int nr_legacy_irqs;
 	struct irq_chip *chip;
+	void (*mask)(unsigned int irq);
+	void (*unmask)(unsigned int irq);
 	void (*mask_all)(void);
 	void (*restore_mask)(void);
 	void (*init)(int auto_eoi);
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@ -206,6 +206,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)

 extern void iounmap(volatile void __iomem *addr);

+extern void set_iounmap_nonlazy(void);

 #ifdef __KERNEL__

--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@ -170,12 +170,6 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);

 extern void probe_nr_irqs_gsi(void);

-extern int setup_ioapic_entry(int apic, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector, int pin);
-extern void ioapic_write_entry(int apic, int pin,
-			       struct IO_APIC_route_entry e);
 extern void setup_ioapic_ids_from_mpc(void);

 struct mp_ioapic_gsi{
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@ -3,4 +3,39 @@

 #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)

+#ifdef CONFIG_INTR_REMAP
+static inline void prepare_irte(struct irte *irte, int vector,
+			        unsigned int dest)
+{
+	memset(irte, 0, sizeof(*irte));
+
+	irte->present = 1;
+	irte->dst_mode = apic->irq_dest_mode;
+	/*
+	 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
+	 * actual level or edge trigger will be setup in the IO-APIC
+	 * RTE. This will help simplify level triggered irq migration.
+	 * For more details, see the comments (in io_apic.c) explainig IO-APIC
+	 * irq migration in the presence of interrupt-remapping.
+	*/
+	irte->trigger_mode = 0;
+	irte->dlvry_mode = apic->irq_delivery_mode;
+	irte->vector = vector;
+	irte->dest_id = IRTE_DEST(dest);
+	irte->redir_hint = 1;
+}
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return cfg->irq_2_iommu.iommu != NULL;
+}
+#else
+static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
+{
+}
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return false;
+}
+#endif
+
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@ -10,6 +10,9 @@
 */
 #ifndef _ASM_X86_MRST_H
 #define _ASM_X86_MRST_H
+
+#include <linux/sfi.h>
+
 extern int pci_mrst_init(void);
 int __init sfi_parse_mrtc(struct sfi_table_header *table);

@ -26,7 +29,7 @@ enum mrst_cpu_type {
 };

 extern enum mrst_cpu_type __mrst_cpu_chip;
-static enum mrst_cpu_type mrst_identify_cpu(void)
+static inline enum mrst_cpu_type mrst_identify_cpu(void)
 {
 	return __mrst_cpu_chip;
 }
@ -42,4 +45,9 @@ extern enum mrst_timer_options mrst_timer_options;
 #define SFI_MTMR_MAX_NUM 8
 #define SFI_MRTC_MAX	8

+extern struct console early_mrst_console;
+extern void mrst_early_console_init(void);
+
+extern struct console early_hsu_console;
+extern void hsu_early_console_init(void);
 #endif /* _ASM_X86_MRST_H */
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@ -0,0 +1,15 @@
+#ifndef _ASM_X86_MWAIT_H
+#define _ASM_X86_MWAIT_H
+
+#define MWAIT_SUBSTATE_MASK		0xf
+#define MWAIT_CSTATE_MASK		0xf
+#define MWAIT_SUBSTATE_SIZE		4
+#define MWAIT_MAX_NUM_CSTATES		8
+
+#define CPUID_MWAIT_LEAF		5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+#define CPUID5_ECX_INTERRUPT_BREAK	0x2
+
+#define MWAIT_ECX_INTERRUPT_BREAK	0x1
+
+#endif /* _ASM_X86_MWAIT_H */
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@ -21,10 +21,14 @@ extern void olpc_ofw_detect(void);
 /* install OFW's pde permanently into the kernel's pgtable */
 extern void setup_olpc_ofw_pgd(void);

+/* check if OFW was detected during boot */
+extern bool olpc_ofw_present(void);
+
 #else /* !CONFIG_OLPC_OPENFIRMWARE */

 static inline void olpc_ofw_detect(void) { }
 static inline void setup_olpc_ofw_pgd(void) { }
+static inline bool olpc_ofw_present(void) { return false; }

 #endif /* !CONFIG_OLPC_OPENFIRMWARE */

--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@ -8,7 +8,7 @@
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))

-#define __PHYSICAL_MASK		((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __PHYSICAL_MASK		((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
 #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)

 /* Cast PAGE_MASK to a signed type so that it is sign-extended if
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@ -416,11 +416,6 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
 	PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
 }

-static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
-					    unsigned long start, unsigned long count)
-{
-	PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
-}
 static inline void paravirt_release_pmd(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@ -255,7 +255,6 @@ struct pv_mmu_ops {
 	 */
 	void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
 	void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
-	void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
 	void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
 	void (*release_pte)(unsigned long pfn);
 	void (*release_pmd)(unsigned long pfn);
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;

+extern struct mm_struct *pgd_page_get_mm(struct page *page);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@ -603,6 +605,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep);
 }

+#define flush_tlb_fix_spurious_fault(vma, address)
+
 /*
 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
 *
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
 	native_set_pgd(pgd, native_make_pgd(0));
 }

+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
 /*
 * Conversion functions: convert a page and protection to a page entry,
 * and a page entry and page directory to the page they refer to.
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@ -110,6 +110,8 @@ struct cpuinfo_x86 {
 	u16			phys_proc_id;
 	/* Core id: */
 	u16			cpu_core_id;
+	/* Compute unit id */
+	u8			compute_unit_id;
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 #endif
@ -602,7 +604,7 @@ extern unsigned long		mmu_cr4_features;

 static inline void set_in_cr4(unsigned long mask)
 {
-	unsigned cr4;
+	unsigned long cr4;

 	mmu_cr4_features |= mask;
 	cr4 = read_cr4();
@ -612,7 +614,7 @@ static inline void set_in_cr4(unsigned long mask)

 static inline void clear_in_cr4(unsigned long mask)
 {
-	unsigned cr4;
+	unsigned long cr4;

 	mmu_cr4_features &= ~mask;
 	cr4 = read_cr4();
@ -764,29 +766,6 @@ extern unsigned long		idle_halt;
 extern unsigned long		idle_nomwait;
 extern bool			c1e_detected;

-/*
- * on systems with caches, caches must be flashed as the absolute
- * last instruction before going into a suspended halt.  Otherwise,
- * dirty data can linger in the cache and become stale on resume,
- * leading to strange errors.
- *
- * perform a variety of operations to guarantee that the compiler
- * will not reorder instructions.  wbinvd itself is serializing
- * so the processor will not reorder.
- *
- * Systems without cache can just go into halt.
- */
-static inline void wbinvd_halt(void)
-{
-	mb();
-	/* check for clflush to determine if wbinvd is legal */
-	if (cpu_has_clflush)
-		asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
-	else
-		while (1)
-			halt();
-}
-
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);

--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@ -93,6 +93,11 @@ void *extend_brk(size_t size, size_t align);
 			: : "i" (sz));					\
 	}

+/* Helper for reserving space for arrays of things */
+#define RESERVE_BRK_ARRAY(type, name, entries)		\
+	type *name;					\
+	RESERVE_BRK(name, sizeof(type) * entries)
+
 #ifdef __i386__

 void __init i386_start_kernel(void);
--- a/arch/x86/include/asm/vmi.h
+++ b/arch/x86/include/asm/vmi.h
@ -1,269 +0,0 @@
-/*
- * VMI interface definition
- *
- * Copyright (C) 2005, VMware, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Maintained by: Zachary Amsden zach@vmware.com
- *
- */
-#include <linux/types.h>
-
-/*
- *---------------------------------------------------------------------
- *
- *  VMI Option ROM API
- *
- *---------------------------------------------------------------------
- */
-#define VMI_SIGNATURE 0x696d5663   /* "cVmi" */
-
-#define PCI_VENDOR_ID_VMWARE            0x15AD
-#define PCI_DEVICE_ID_VMWARE_VMI        0x0801
-
-/*
- * We use two version numbers for compatibility, with the major
- * number signifying interface breakages, and the minor number
- * interface extensions.
- */
-#define VMI_API_REV_MAJOR       3
-#define VMI_API_REV_MINOR       0
-
-#define VMI_CALL_CPUID			0
-#define VMI_CALL_WRMSR			1
-#define VMI_CALL_RDMSR			2
-#define VMI_CALL_SetGDT			3
-#define VMI_CALL_SetLDT			4
-#define VMI_CALL_SetIDT			5
-#define VMI_CALL_SetTR			6
-#define VMI_CALL_GetGDT			7
-#define VMI_CALL_GetLDT			8
-#define VMI_CALL_GetIDT			9
-#define VMI_CALL_GetTR			10
-#define VMI_CALL_WriteGDTEntry		11
-#define VMI_CALL_WriteLDTEntry		12
-#define VMI_CALL_WriteIDTEntry		13
-#define VMI_CALL_UpdateKernelStack	14
-#define VMI_CALL_SetCR0			15
-#define VMI_CALL_SetCR2			16
-#define VMI_CALL_SetCR3			17
-#define VMI_CALL_SetCR4			18
-#define VMI_CALL_GetCR0			19
-#define VMI_CALL_GetCR2			20
-#define VMI_CALL_GetCR3			21
-#define VMI_CALL_GetCR4			22
-#define VMI_CALL_WBINVD			23
-#define VMI_CALL_SetDR			24
-#define VMI_CALL_GetDR			25
-#define VMI_CALL_RDPMC			26
-#define VMI_CALL_RDTSC			27
-#define VMI_CALL_CLTS			28
-#define VMI_CALL_EnableInterrupts	29
-#define VMI_CALL_DisableInterrupts	30
-#define VMI_CALL_GetInterruptMask	31
-#define VMI_CALL_SetInterruptMask	32
-#define VMI_CALL_IRET			33
-#define VMI_CALL_SYSEXIT		34
-#define VMI_CALL_Halt			35
-#define VMI_CALL_Reboot			36
-#define VMI_CALL_Shutdown		37
-#define VMI_CALL_SetPxE			38
-#define VMI_CALL_SetPxELong		39
-#define VMI_CALL_UpdatePxE		40
-#define VMI_CALL_UpdatePxELong		41
-#define VMI_CALL_MachineToPhysical	42
-#define VMI_CALL_PhysicalToMachine	43
-#define VMI_CALL_AllocatePage		44
-#define VMI_CALL_ReleasePage		45
-#define VMI_CALL_InvalPage		46
-#define VMI_CALL_FlushTLB		47
-#define VMI_CALL_SetLinearMapping	48
-
-#define VMI_CALL_SetIOPLMask		61
-#define VMI_CALL_SetInitialAPState	62
-#define VMI_CALL_APICWrite		63
-#define VMI_CALL_APICRead		64
-#define VMI_CALL_IODelay		65
-#define VMI_CALL_SetLazyMode		73
-
-/*
- *---------------------------------------------------------------------
- *
- * MMU operation flags
- *
- *---------------------------------------------------------------------
- */
-
-/* Flags used by VMI_{Allocate|Release}Page call */
-#define VMI_PAGE_PAE             0x10  /* Allocate PAE shadow */
-#define VMI_PAGE_CLONE           0x20  /* Clone from another shadow */
-#define VMI_PAGE_ZEROED          0x40  /* Page is pre-zeroed */
-
-
-/* Flags shared by Allocate|Release Page and PTE updates */
-#define VMI_PAGE_PT              0x01
-#define VMI_PAGE_PD              0x02
-#define VMI_PAGE_PDP             0x04
-#define VMI_PAGE_PML4            0x08
-
-#define VMI_PAGE_NORMAL          0x00 /* for debugging */
-
-/* Flags used by PTE updates */
-#define VMI_PAGE_CURRENT_AS      0x10 /* implies VMI_PAGE_VA_MASK is valid */
-#define VMI_PAGE_DEFER           0x20 /* may queue update until TLB inval */
-#define VMI_PAGE_VA_MASK         0xfffff000
-
-#ifdef CONFIG_X86_PAE
-#define VMI_PAGE_L1		(VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED)
-#define VMI_PAGE_L2		(VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED)
-#else
-#define VMI_PAGE_L1		(VMI_PAGE_PT | VMI_PAGE_ZEROED)
-#define VMI_PAGE_L2		(VMI_PAGE_PD | VMI_PAGE_ZEROED)
-#endif
-
-/* Flags used by VMI_FlushTLB call */
-#define VMI_FLUSH_TLB            0x01
-#define VMI_FLUSH_GLOBAL         0x02
-
-/*
- *---------------------------------------------------------------------
- *
- *  VMI relocation definitions for ROM call get_reloc
- *
- *---------------------------------------------------------------------
- */
-
-/* VMI Relocation types */
-#define VMI_RELOCATION_NONE     0
-#define VMI_RELOCATION_CALL_REL 1
-#define VMI_RELOCATION_JUMP_REL 2
-#define VMI_RELOCATION_NOP	3
-
-#ifndef __ASSEMBLY__
-struct vmi_relocation_info {
-	unsigned char           *eip;
-	unsigned char           type;
-	unsigned char           reserved[3];
-};
-#endif
-
-
-/*
- *---------------------------------------------------------------------
- *
- *  Generic ROM structures and definitions
- *
- *---------------------------------------------------------------------
- */
-
-#ifndef __ASSEMBLY__
-
-struct vrom_header {
-	u16     rom_signature;  /* option ROM signature */
-	u8      rom_length;     /* ROM length in 512 byte chunks */
-	u8      rom_entry[4];   /* 16-bit code entry point */
-	u8      rom_pad0;       /* 4-byte align pad */
-	u32     vrom_signature; /* VROM identification signature */
-	u8      api_version_min;/* Minor version of API */
-	u8      api_version_maj;/* Major version of API */
-	u8      jump_slots;     /* Number of jump slots */
-	u8      reserved1;      /* Reserved for expansion */
-	u32     virtual_top;    /* Hypervisor virtual address start */
-	u16     reserved2;      /* Reserved for expansion */
-	u16	license_offs;	/* Offset to License string */
-	u16     pci_header_offs;/* Offset to PCI OPROM header */
-	u16     pnp_header_offs;/* Offset to PnP OPROM header */
-	u32     rom_pad3;       /* PnP reserverd / VMI reserved */
-	u8      reserved[96];   /* Reserved for headers */
-	char    vmi_init[8];    /* VMI_Init jump point */
-	char    get_reloc[8];   /* VMI_GetRelocationInfo jump point */
-} __attribute__((packed));
-
-struct pnp_header {
-	char sig[4];
-	char rev;
-	char size;
-	short next;
-	short res;
-	long devID;
-	unsigned short manufacturer_offset;
-	unsigned short product_offset;
-} __attribute__((packed));
-
-struct pci_header {
-	char sig[4];
-	short vendorID;
-	short deviceID;
-	short vpdData;
-	short size;
-	char rev;
-	char class;
-	char subclass;
-	char interface;
-	short chunks;
-	char rom_version_min;
-	char rom_version_maj;
-	char codetype;
-	char lastRom;
-	short reserved;
-} __attribute__((packed));
-
-/* Function prototypes for bootstrapping */
-#ifdef CONFIG_VMI
-extern void vmi_init(void);
-extern void vmi_activate(void);
-extern void vmi_bringup(void);
-#else
-static inline void vmi_init(void) {}
-static inline void vmi_activate(void) {}
-static inline void vmi_bringup(void) {}
-#endif
-
-/* State needed to start an application processor in an SMP system. */
-struct vmi_ap_state {
-	u32 cr0;
-	u32 cr2;
-	u32 cr3;
-	u32 cr4;
-
-	u64 efer;
-
-	u32 eip;
-	u32 eflags;
-	u32 eax;
-	u32 ebx;
-	u32 ecx;
-	u32 edx;
-	u32 esp;
-	u32 ebp;
-	u32 esi;
-	u32 edi;
-	u16 cs;
-	u16 ss;
-	u16 ds;
-	u16 es;
-	u16 fs;
-	u16 gs;
-	u16 ldtr;
-
-	u16 gdtr_limit;
-	u32 gdtr_base;
-	u32 idtr_base;
-	u16 idtr_limit;
-};
-
-#endif
--- a/arch/x86/include/asm/vmi_time.h
+++ b/arch/x86/include/asm/vmi_time.h
@ -1,98 +0,0 @@
-/*
- * VMI Time wrappers
- *
- * Copyright (C) 2006, VMware, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to dhecht@vmware.com
- *
- */
-
-#ifndef _ASM_X86_VMI_TIME_H
-#define _ASM_X86_VMI_TIME_H
-
-/*
- * Raw VMI call indices for timer functions
- */
-#define VMI_CALL_GetCycleFrequency	66
-#define VMI_CALL_GetCycleCounter	67
-#define VMI_CALL_SetAlarm		68
-#define VMI_CALL_CancelAlarm		69
-#define VMI_CALL_GetWallclockTime	70
-#define VMI_CALL_WallclockUpdated	71
-
-/* Cached VMI timer operations */
-extern struct vmi_timer_ops {
-	u64 (*get_cycle_frequency)(void);
-	u64 (*get_cycle_counter)(int);
-	u64 (*get_wallclock)(void);
-	int (*wallclock_updated)(void);
-	void (*set_alarm)(u32 flags, u64 expiry, u64 period);
-	void (*cancel_alarm)(u32 flags);
-} vmi_timer_ops;
-
-/* Prototypes */
-extern void __init vmi_time_init(void);
-extern unsigned long vmi_get_wallclock(void);
-extern int vmi_set_wallclock(unsigned long now);
-extern unsigned long long vmi_sched_clock(void);
-extern unsigned long vmi_tsc_khz(void);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-extern void __devinit vmi_time_bsp_init(void);
-extern void __devinit vmi_time_ap_init(void);
-#endif
-
-/*
- * When run under a hypervisor, a vcpu is always in one of three states:
- * running, halted, or ready.  The vcpu is in the 'running' state if it
- * is executing.  When the vcpu executes the halt interface, the vcpu
- * enters the 'halted' state and remains halted until there is some work
- * pending for the vcpu (e.g. an alarm expires, host I/O completes on
- * behalf of virtual I/O).  At this point, the vcpu enters the 'ready'
- * state (waiting for the hypervisor to reschedule it).  Finally, at any
- * time when the vcpu is not in the 'running' state nor the 'halted'
- * state, it is in the 'ready' state.
- *
- * Real time is advances while the vcpu is 'running', 'ready', or
- * 'halted'.  Stolen time is the time in which the vcpu is in the
- * 'ready' state.  Available time is the remaining time -- the vcpu is
- * either 'running' or 'halted'.
- *
- * All three views of time are accessible through the VMI cycle
- * counters.
- */
-
-/* The cycle counters. */
-#define VMI_CYCLES_REAL         0
-#define VMI_CYCLES_AVAILABLE    1
-#define VMI_CYCLES_STOLEN       2
-
-/* The alarm interface 'flags' bits */
-#define VMI_ALARM_COUNTERS      2
-
-#define VMI_ALARM_COUNTER_MASK  0x000000ff
-
-#define VMI_ALARM_WIRED_IRQ0    0x00000000
-#define VMI_ALARM_WIRED_LVTT    0x00010000
-
-#define VMI_ALARM_IS_ONESHOT    0x00000000
-#define VMI_ALARM_IS_PERIODIC   0x00000100
-
-#define CONFIG_VMI_ALARM_HZ	100
-
-#endif /* _ASM_X86_VMI_TIME_H */
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@ -86,15 +86,15 @@ obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_EARLY_PRINTK_MRST)	+= early_printk_mrst.o

 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_APB_TIMER)		+= apb_timer.o

-obj-$(CONFIG_K8_NB)		+= k8.o
+obj-$(CONFIG_AMD_NB)		+= amd_nb.o
 obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
 obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o

-obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
@ -107,6 +107,7 @@ obj-$(CONFIG_SCx200)		+= scx200.o
 scx200-y			+= scx200_32.o

 obj-$(CONFIG_OLPC)		+= olpc.o
+obj-$(CONFIG_OLPC_XO1)		+= olpc-xo1.o
 obj-$(CONFIG_OLPC_OPENFIRMWARE)	+= olpc_ofw.o
 obj-$(CONFIG_X86_MRST)		+= mrst.o

@ -123,7 +124,6 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_X86_UV)		+= tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
-	obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
 	obj-$(CONFIG_AUDIT)		+= audit_64.o

 	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@ -13,6 +13,7 @@

 #include <acpi/processor.h>
 #include <asm/acpi.h>
+#include <asm/mwait.h>

 /*
 * Initialize bm_flags based on the CPU cache properties
@ -65,16 +66,6 @@ static struct cstate_entry __percpu *cpu_cstate_entry;	/* per CPU ptr */

 static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];

-#define MWAIT_SUBSTATE_MASK	(0xf)
-#define MWAIT_CSTATE_MASK	(0xf)
-#define MWAIT_SUBSTATE_SIZE	(4)
-
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK	(0x2)
-
-#define MWAIT_ECX_INTERRUPT_BREAK	(0x1)
-
 #define NATIVE_CSTATE_BEYOND_HALT	(2)

 static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <joerg.roedel@amd.com>
 *         Leo Duran <leo.duran@amd.com>
 *
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <joerg.roedel@amd.com>
 *         Leo Duran <leo.duran@amd.com>
 *
@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size)
 	return 1UL << shift;
 }

+/* Access to l1 and l2 indexed register spaces */
+
+static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
+{
+	u32 val;
+
+	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
+	pci_read_config_dword(iommu->dev, 0xfc, &val);
+	return val;
+}
+
+static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
+{
+	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
+	pci_write_config_dword(iommu->dev, 0xfc, val);
+	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
+}
+
+static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
+{
+	u32 val;
+
+	pci_write_config_dword(iommu->dev, 0xf0, address);
+	pci_read_config_dword(iommu->dev, 0xf4, &val);
+	return val;
+}
+
+static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
+{
+	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
+	pci_write_config_dword(iommu->dev, 0xf4, val);
+}
+
 /****************************************************************************
 *
 * AMD IOMMU MMIO register space handling functions
@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 {
 	int cap_ptr = iommu->cap_ptr;
 	u32 range, misc;
+	int i, j;

 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
 			      &iommu->cap);
@ -633,12 +667,29 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);

-	if (is_rd890_iommu(iommu->dev)) {
-		pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]);
-		pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]);
-		pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]);
-		pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]);
-	}
+	if (!is_rd890_iommu(iommu->dev))
+		return;
+
+	/*
+	 * Some rd890 systems may not be fully reconfigured by the BIOS, so
+	 * it's necessary for us to store this information so it can be
+	 * reprogrammed on resume
+	 */
+
+	pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
+			      &iommu->stored_addr_lo);
+	pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
+			      &iommu->stored_addr_hi);
+
+	/* Low bit locks writes to configuration space */
+	iommu->stored_addr_lo &= ~1;
+
+	for (i = 0; i < 6; i++)
+		for (j = 0; j < 0x12; j++)
+			iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
+
+	for (i = 0; i < 0x83; i++)
+		iommu->stored_l2[i] = iommu_read_l2(iommu, i);
 }

 /*
@ -1127,14 +1178,53 @@ static void iommu_init_flags(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
 }

-static void iommu_apply_quirks(struct amd_iommu *iommu)
+static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
 {
-	if (is_rd890_iommu(iommu->dev)) {
-		pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]);
-		pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]);
-		pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]);
-		pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]);
-	}
+	int i, j;
+	u32 ioc_feature_control;
+	struct pci_dev *pdev = NULL;
+
+	/* RD890 BIOSes may not have completely reconfigured the iommu */
+	if (!is_rd890_iommu(iommu->dev))
+		return;
+
+	/*
+	 * First, we need to ensure that the iommu is enabled. This is
+	 * controlled by a register in the northbridge
+	 */
+	pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
+
+	if (!pdev)
+		return;
+
+	/* Select Northbridge indirect register 0x75 and enable writing */
+	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
+	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
+
+	/* Enable the iommu */
+	if (!(ioc_feature_control & 0x1))
+		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
+
+	pci_dev_put(pdev);
+
+	/* Restore the iommu BAR */
+	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
+			       iommu->stored_addr_lo);
+	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
+			       iommu->stored_addr_hi);
+
+	/* Restore the l1 indirect regs for each of the 6 l1s */
+	for (i = 0; i < 6; i++)
+		for (j = 0; j < 0x12; j++)
+			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
+
+	/* Restore the l2 indirect regs */
+	for (i = 0; i < 0x83; i++)
+		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
+
+	/* Lock PCI setup registers */
+	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
+			       iommu->stored_addr_lo | 1);
 }

 /*
@ -1147,7 +1237,6 @@ static void enable_iommus(void)

 	for_each_iommu(iommu) {
 		iommu_disable(iommu);
-		iommu_apply_quirks(iommu);
 		iommu_init_flags(iommu);
 		iommu_set_device_table(iommu);
 		iommu_enable_command_buffer(iommu);
@ -1173,6 +1262,11 @@ static void disable_iommus(void)

 static int amd_iommu_resume(struct sys_device *dev)
 {
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu)
+		iommu_apply_resume_quirks(iommu);
+
 	/* re-load the hardware */
 	enable_iommus();

--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@ -8,21 +8,19 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <asm/k8.h>
-
-int num_k8_northbridges;
-EXPORT_SYMBOL(num_k8_northbridges);
+#include <asm/amd_nb.h>

 static u32 *flush_words;

 struct pci_device_id k8_nb_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
 	{}
 };
 EXPORT_SYMBOL(k8_nb_ids);

-struct pci_dev **k8_northbridges;
+struct k8_northbridge_info k8_northbridges;
 EXPORT_SYMBOL(k8_northbridges);

 static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
@ -40,36 +38,45 @@ int cache_k8_northbridges(void)
 	int i;
 	struct pci_dev *dev;

-	if (num_k8_northbridges)
+	if (k8_northbridges.num)
 		return 0;

 	dev = NULL;
 	while ((dev = next_k8_northbridge(dev)) != NULL)
-		num_k8_northbridges++;
+		k8_northbridges.num++;

-	k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
-				  GFP_KERNEL);
-	if (!k8_northbridges)
+	/* some CPU families (e.g. family 0x11) do not support GART */
+	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+	    boot_cpu_data.x86 == 0x15)
+		k8_northbridges.gart_supported = 1;
+
+	k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
+					  sizeof(void *), GFP_KERNEL);
+	if (!k8_northbridges.nb_misc)
 		return -ENOMEM;

-	if (!num_k8_northbridges) {
-		k8_northbridges[0] = NULL;
+	if (!k8_northbridges.num) {
+		k8_northbridges.nb_misc[0] = NULL;
 		return 0;
 	}

-	flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
-	if (!flush_words) {
-		kfree(k8_northbridges);
-		return -ENOMEM;
+	if (k8_northbridges.gart_supported) {
+		flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
+				      GFP_KERNEL);
+		if (!flush_words) {
+			kfree(k8_northbridges.nb_misc);
+			return -ENOMEM;
+		}
 	}

 	dev = NULL;
 	i = 0;
 	while ((dev = next_k8_northbridge(dev)) != NULL) {
-		k8_northbridges[i] = dev;
-		pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
+		k8_northbridges.nb_misc[i] = dev;
+		if (k8_northbridges.gart_supported)
+			pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
 	}
-	k8_northbridges[i] = NULL;
+	k8_northbridges.nb_misc[i] = NULL;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cache_k8_northbridges);
@ -93,22 +100,25 @@ void k8_flush_garts(void)
 	unsigned long flags;
 	static DEFINE_SPINLOCK(gart_lock);

+	if (!k8_northbridges.gart_supported)
+		return;
+
 	/* Avoid races between AGP and IOMMU. In theory it's not needed
 	   but I'm not sure if the hardware won't lose flush requests
 	   when another is pending. This whole thing is so expensive anyways
 	   that it doesn't matter to serialize more. -AK */
 	spin_lock_irqsave(&gart_lock, flags);
 	flushed = 0;
-	for (i = 0; i < num_k8_northbridges; i++) {
-		pci_write_config_dword(k8_northbridges[i], 0x9c,
+	for (i = 0; i < k8_northbridges.num; i++) {
+		pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
 				       flush_words[i]|1);
 		flushed++;
 	}
-	for (i = 0; i < num_k8_northbridges; i++) {
+	for (i = 0; i < k8_northbridges.num; i++) {
 		u32 w;
 		/* Make sure the hardware actually executed the flush*/
 		for (;;) {
-			pci_read_config_dword(k8_northbridges[i],
+			pci_read_config_dword(k8_northbridges.nb_misc[i],
 					      0x9c, &w);
 			if (!(w & 1))
 				break;
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@ -231,34 +231,6 @@ static void apbt_restart_clocksource(struct clocksource *cs)
 	apbt_start_counter(phy_cs_timer_id);
 }

-/* Setup IRQ routing via IOAPIC */
-#ifdef CONFIG_SMP
-static void apbt_setup_irq(struct apbt_dev *adev)
-{
-	struct irq_chip *chip;
-	struct irq_desc *desc;
-
-	/* timer0 irq has been setup early */
-	if (adev->irq == 0)
-		return;
-	desc = irq_to_desc(adev->irq);
-	chip = get_irq_chip(adev->irq);
-	disable_irq(adev->irq);
-	desc->status |= IRQ_MOVE_PCNTXT;
-	irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
-	/* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
-	set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
-	enable_irq(adev->irq);
-	if (system_state == SYSTEM_BOOTING)
-		if (request_irq(adev->irq, apbt_interrupt_handler,
-				IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
-				adev->name, adev)) {
-			printk(KERN_ERR "Failed request IRQ for APBT%d\n",
-			       adev->num);
-		}
-}
-#endif
-
 static void apbt_enable_int(int n)
 {
 	unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
@ -334,6 +306,27 @@ static int __init apbt_clockevent_register(void)
 }

 #ifdef CONFIG_SMP
+
+static void apbt_setup_irq(struct apbt_dev *adev)
+{
+	/* timer0 irq has been setup early */
+	if (adev->irq == 0)
+		return;
+
+	if (system_state == SYSTEM_BOOTING) {
+		irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+		/* APB timer irqs are set up as mp_irqs, timer is edge type */
+		__set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
+		if (request_irq(adev->irq, apbt_interrupt_handler,
+				IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
+				adev->name, adev)) {
+			printk(KERN_ERR "Failed request IRQ for APBT%d\n",
+			       adev->num);
+		}
+	} else
+		enable_irq(adev->irq);
+}
+
 /* Should be called with per cpu */
 void apbt_setup_secondary_clock(void)
 {
@ -343,7 +336,7 @@ void apbt_setup_secondary_clock(void)

 	/* Don't register boot CPU clockevent */
 	cpu = smp_processor_id();
-	if (cpu == boot_cpu_id)
+	if (!cpu)
 		return;
 	/*
 	 * We need to calculate the scaled math multiplication factor for
@ -389,16 +382,17 @@ static int apbt_cpuhp_notify(struct notifier_block *n,

 	switch (action & 0xf) {
 	case CPU_DEAD:
+		disable_irq(adev->irq);
 		apbt_disable_int(cpu);
-		if (system_state == SYSTEM_RUNNING)
+		if (system_state == SYSTEM_RUNNING) {
 			pr_debug("skipping APBT CPU %lu offline\n", cpu);
-		else if (adev) {
+		} else if (adev) {
 			pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
 			free_irq(adev->irq, adev);
 		}
 		break;
 	default:
-		pr_debug(KERN_INFO "APBT notified %lu, no action\n", action);
+		pr_debug("APBT notified %lu, no action\n", action);
 	}
 	return NOTIFY_OK;
 }
@ -552,7 +546,7 @@ bad_count:
 		pr_debug("APB CS going back %lx:%lx:%lx ",
 			 t2, last_read, t2 - last_read);
 bad_count_x3:
-		pr_debug(KERN_INFO "tripple check enforced\n");
+		pr_debug("triple check enforced\n");
 		t0 = apbt_readl(phy_cs_timer_id,
 				APBTMR_N_CURRENT_VALUE);
 		udelay(1);
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@ -27,7 +27,7 @@
 #include <asm/gart.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
 #include <asm/x86_init.h>

 int gart_iommu_aperture;
@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void)
 				continue;

 			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
-			aper_enabled = ctl & AMD64_GARTEN;
+			aper_enabled = ctl & GARTEN;
 			aper_order = (ctl >> 1) & 7;
 			aper_size = (32 * 1024 * 1024) << aper_order;
 			aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void)
 				continue;

 			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
-			ctl &= ~AMD64_GARTEN;
+			ctl &= ~GARTEN;
 			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
 		}
 	}
@ -505,8 +505,13 @@ out:

 	/* Fix up the north bridges */
 	for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
-		int bus;
-		int dev_base, dev_limit;
+		int bus, dev_base, dev_limit;
+
+		/*
+		 * Don't enable translation yet but enable GART IO and CPU
+		 * accesses and set DISTLBWALKPRB since GART table memory is UC.
+		 */
+		u32 ctl = DISTLBWALKPRB | aper_order << 1;

 		bus = bus_dev_ranges[i].bus;
 		dev_base = bus_dev_ranges[i].dev_base;
@ -515,10 +520,7 @@ out:
 			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
 				continue;

-			/* Don't enable translation yet. That is done later.
-			   Assume this BIOS didn't initialise the GART so
-			   just overwrite all previous bits */
-			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
+			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
 			write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
 		}
 	}
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@ -52,6 +52,7 @@
 #include <asm/mce.h>
 #include <asm/kvm_para.h>
 #include <asm/tsc.h>
+#include <asm/atomic.h>

 unsigned int num_processors;

@ -370,38 +371,87 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 }

 /*
- * Setup extended LVT, AMD specific (K8, family 10h)
+ * Setup extended LVT, AMD specific
 *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ * Software should use the LVT offsets the BIOS provides.  The offsets
+ * are determined by the subsystems using it like those for MCE
+ * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
+ * are supported. Beginning with family 10h at least 4 offsets are
+ * available.
 *
+ * Since the offsets must be consistent for all cores, we keep track
+ * of the LVT offsets in software and reserve the offset for the same
+ * vector also to be used on other cores. An offset is freed by
+ * setting the entry to APIC_EILVT_MASKED.
+ *
+ * If the BIOS is right, there should be no conflicts. Otherwise a
+ * "[Firmware Bug]: ..." error message is generated. However, if
+ * software does not properly determines the offsets, it is not
+ * necessarily a BIOS bug.
+ */
+
+static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
+
+static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
+{
+	return (old & APIC_EILVT_MASKED)
+		|| (new == APIC_EILVT_MASKED)
+		|| ((new & ~APIC_EILVT_MASKED) == old);
+}
+
+static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
+{
+	unsigned int rsvd;			/* 0: uninitialized */
+
+	if (offset >= APIC_EILVT_NR_MAX)
+		return ~0;
+
+	rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED;
+	do {
+		if (rsvd &&
+		    !eilvt_entry_is_changeable(rsvd, new))
+			/* may not change if vectors are different */
+			return rsvd;
+		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
+	} while (rsvd != new);
+
+	return new;
+}
+
+/*
 * If mask=1, the LVT entry does not generate interrupts while mask=0
 * enables the vector. See also the BKDGs.
 */

-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
 {
-	unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+	unsigned long reg = APIC_EILVTn(offset);
+	unsigned int new, old, reserved;

-	apic_write(reg, v);
-}
+	new = (mask << 16) | (msg_type << 8) | vector;
+	old = apic_read(reg);
+	reserved = reserve_eilvt_offset(offset, new);

-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_MCE;
-}
+	if (reserved != new) {
+		pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
+		       "vector 0x%x was already reserved by another core, "
+		       "APIC%lX=0x%x\n",
+		       smp_processor_id(), new, reserved, reg, old);
+		return -EINVAL;
+	}

-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-	return APIC_EILVT_LVTOFF_IBS;
+	if (!eilvt_entry_is_changeable(old, new)) {
+		pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
+		       "register already in use, APIC%lX=0x%x\n",
+		       smp_processor_id(), new, reg, old);
+		return -EBUSY;
+	}
+
+	apic_write(reg, new);
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt);

 /*
 * Program the next event, relative to now
@ -1665,10 +1715,7 @@ int __init APIC_init_uniprocessor(void)
 	}
 #endif

-#ifndef CONFIG_SMP
-	enable_IR_x2apic();
 	default_setup_apic_routing();
-#endif

 	verify_local_APIC();
 	connect_bsp_APIC();
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@ -178,7 +178,7 @@ int __init check_nmi_watchdog(void)
 error:
 	if (nmi_watchdog == NMI_IO_APIC) {
 		if (!timer_through_8259)
-			legacy_pic->chip->mask(0);
+			legacy_pic->mask(0);
 		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 	}

--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@ -54,6 +54,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
 */
 void __init default_setup_apic_routing(void)
 {
+
+	enable_IR_x2apic();
+
 #ifdef CONFIG_X86_X2APIC
 	if (x2apic_mode
 #ifdef CONFIG_X86_UV
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	/* calling is from identify_secondary_cpu() ? */
-	if (c->cpu_index == boot_cpu_id)
+	if (!c->cpu_index)
 		return;

 	/*
@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid)
 #endif

 /*
- * Fixup core topology information for AMD multi-node processors.
- * Assumption: Number of cores in each internal node is the same.
+ * Fixup core topology information for
+ * (1) AMD multi-node processors
+ *     Assumption: Number of cores in each internal node is the same.
+ * (2) AMD processors supporting compute units
 */
 #ifdef CONFIG_X86_HT
-static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
+static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
 {
-	unsigned long long value;
-	u32 nodes, cores_per_node;
+	u32 nodes;
+	u8 node_id;
 	int cpu = smp_processor_id();

-	if (!cpu_has(c, X86_FEATURE_NODEID_MSR))
+	/* get information required for multi-node processors */
+	if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+		nodes = ((ecx >> 8) & 7) + 1;
+		node_id = ecx & 7;
+
+		/* get compute unit information */
+		smp_num_siblings = ((ebx >> 8) & 3) + 1;
+		c->compute_unit_id = ebx & 0xff;
+	} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
+		u64 value;
+
+		rdmsrl(MSR_FAM10H_NODE_ID, value);
+		nodes = ((value >> 3) & 7) + 1;
+		node_id = value & 7;
+	} else
 		return;

-	/* fixup topology information only once for a core */
-	if (cpu_has(c, X86_FEATURE_AMD_DCM))
-		return;
+	/* fixup multi-node processor information */
+	if (nodes > 1) {
+		u32 cores_per_node;

-	rdmsrl(MSR_FAM10H_NODE_ID, value);
+		set_cpu_cap(c, X86_FEATURE_AMD_DCM);
+		cores_per_node = c->x86_max_cores / nodes;

-	nodes = ((value >> 3) & 7) + 1;
-	if (nodes == 1)
-		return;
+		/* store NodeID, use llc_shared_map to store sibling info */
+		per_cpu(cpu_llc_id, cpu) = node_id;

-	set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-	cores_per_node = c->x86_max_cores / nodes;
-
-	/* store NodeID, use llc_shared_map to store sibling info */
-	per_cpu(cpu_llc_id, cpu) = value & 7;
-
-	/* fixup core id to be in range from 0 to (cores_per_node - 1) */
-	c->cpu_core_id = c->cpu_core_id % cores_per_node;
+		/* core id to be in range from 0 to (cores_per_node - 1) */
+		c->cpu_core_id = c->cpu_core_id % cores_per_node;
+	}
 }
 #endif

@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 	c->phys_proc_id = c->initial_apicid >> bits;
 	/* use socket ID also for last level cache */
 	per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
-	/* fixup topology information on multi-node processors */
-	if ((c->x86 == 0x10) && (c->x86_model == 9))
-		amd_fixup_dcm(c);
+	amd_get_topology(c);
 #endif
 }

@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
 	}
 #endif
+
+	/* We need to do the following only once */
+	if (c != &boot_cpu_data)
+		return;
+
+	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
+
+		if (c->x86 > 0x10 ||
+		    (c->x86 == 0x10 && c->x86_model >= 0x2)) {
+			u64 val;
+
+			rdmsrl(MSR_K7_HWCR, val);
+			if (!(val & BIT(24)))
+				printk(KERN_WARNING FW_BUG "TSC doesn't count "
+					"with P0 frequency!\n");
+		}
+	}
 }

 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #endif

 	if (c->extended_cpuid_level >= 0x80000006) {
-		if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000))
+		if (cpuid_edx(0x80000006) & 0xf000)
 			num_cache_leaves = 4;
 		else
 			num_cache_leaves = 3;
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		this_cpu->c_early_init(c);

 #ifdef CONFIG_SMP
-	c->cpu_index = boot_cpu_id;
+	c->cpu_index = 0;
 #endif
 	filter_cpuid_features(c, false);
 }
@ -704,16 +704,21 @@ void __init early_cpu_init(void)
 }

 /*
- * The NOPL instruction is supposed to exist on all CPUs with
- * family >= 6; unfortunately, that's not true in practice because
- * of early VIA chips and (more importantly) broken virtualizers that
- * are not easy to detect.  In the latter case it doesn't even *fail*
- * reliably, so probing for it doesn't even work.  Disable it completely
+ * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
+ * unfortunately, that's not true in practice because of early VIA
+ * chips and (more importantly) broken virtualizers that are not easy
+ * to detect. In the latter case it doesn't even *fail* reliably, so
+ * probing for it doesn't even work. Disable it completely on 32-bit
 * unless we can find a reliable way to detect all the broken cases.
+ * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
 */
 static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
 {
+#ifdef CONFIG_X86_32
 	clear_cpu_cap(c, X86_FEATURE_NOPL);
+#else
+	set_cpu_cap(c, X86_FEATURE_NOPL);
+#endif
 }

 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@ -1264,13 +1269,6 @@ void __cpuinit cpu_init(void)
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();

-	/*
-	 * Force FPU initialization:
-	 */
-	current_thread_info()->status = 0;
-	clear_used_math();
-	mxcsr_feature_mask_init();
-
 	fpu_init();
 	xsave_init();
 }
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@ -32,6 +32,7 @@ struct cpu_dev {
 extern const struct cpu_dev *const __x86_cpu_dev_start[],
 			    *const __x86_cpu_dev_end[];

+extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
 extern void get_cpu_cap(struct cpuinfo_x86 *c);

--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@ -170,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	/* calling is from identify_secondary_cpu() ? */
-	if (c->cpu_index == boot_cpu_id)
+	if (!c->cpu_index)
 		return;

 	/*
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@ -17,7 +17,7 @@

 #include <asm/processor.h>
 #include <linux/smp.h>
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
 #include <asm/smp.h>

 #define LVL_1_INST	1
@ -306,7 +306,7 @@ struct _cache_attr {
 	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
 };

-#ifdef CONFIG_CPU_SUP_AMD
+#ifdef CONFIG_AMD_NB

 /*
 * L3 cache descriptors
@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
 			return;

 	/* not in virtualized environments */
-	if (num_k8_northbridges == 0)
+	if (k8_northbridges.num == 0)
 		return;

 	/*
@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
 	 * never freed but this is done only on shutdown so it doesn't matter.
 	 */
 	if (!l3_caches) {
-		int size = num_k8_northbridges * sizeof(struct amd_l3_cache *);
+		int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);

 		l3_caches = kzalloc(size, GFP_ATOMIC);
 		if (!l3_caches)
@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 		show_cache_disable_1, store_cache_disable_1);

-#else	/* CONFIG_CPU_SUP_AMD */
+#else	/* CONFIG_AMD_NB */
 static void __cpuinit
 amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
 {
 };
-#endif /* CONFIG_CPU_SUP_AMD */
+#endif /* CONFIG_AMD_NB */

 static int
 __cpuinit cpuid4_cache_lookup_regs(int index,
@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = {

 static struct attribute *default_l3_attrs[] = {
 	DEFAULT_SYSFS_CACHE_ATTRS,
-#ifdef CONFIG_CPU_SUP_AMD
+#ifdef CONFIG_AMD_NB
 	&cache_disable_0.attr,
 	&cache_disable_1.attr,
 #endif
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@ -131,7 +131,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
 	struct thresh_restart tr;
-	u8 lvt_off;
+	int lvt_off = -1;
+	u8 offset;

 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@ -162,8 +163,28 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			if (shared_bank[bank] && c->cpu_core_id)
 				break;
 #endif
-			lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR,
-						       APIC_EILVT_MSG_FIX, 0);
+			offset = (high & MASK_LVTOFF_HI) >> 20;
+			if (lvt_off < 0) {
+				if (setup_APIC_eilvt(offset,
+						     THRESHOLD_APIC_VECTOR,
+						     APIC_EILVT_MSG_FIX, 0)) {
+					pr_err(FW_BUG "cpu %d, failed to "
+					       "setup threshold interrupt "
+					       "for bank %d, block %d "
+					       "(MSR%08X=0x%x%08x)",
+					       smp_processor_id(), bank, block,
+					       address, high, low);
+					continue;
+				}
+				lvt_off = offset;
+			} else if (lvt_off != offset) {
+				pr_err(FW_BUG "cpu %d, invalid threshold "
+				       "interrupt offset %d for bank %d,"
+				       "block %d (MSR%08X=0x%x%08x)",
+				       smp_processor_id(), lvt_off, bank,
+				       block, address, high, low);
+				continue;
+			}

 			high &= ~MASK_LVTOFF_HI;
 			high |= lvt_off << 20;
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@ -350,7 +350,7 @@ static void intel_thermal_interrupt(void)

 static void unexpected_thermal_interrupt(void)
 {
-	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
+	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
 			smp_processor_id());
 	add_taint(TAINT_MACHINE_CHECK);
 }
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void)

 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 		return 0;
-	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+	if (boot_cpu_data.x86 < 0xf)
 		return 0;
 	/* In case some hypervisor doesn't pass SYSCFG through: */
 	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@ -64,18 +64,59 @@ static inline void k8_check_syscfg_dram_mod_en(void)
 	}
 }

+/* Get the size of contiguous MTRR range */
+static u64 get_mtrr_size(u64 mask)
+{
+	u64 size;
+
+	mask >>= PAGE_SHIFT;
+	mask |= size_or_mask;
+	size = -mask;
+	size <<= PAGE_SHIFT;
+	return size;
+}
+
 /*
- * Returns the effective MTRR type for the region
- * Error returns:
- * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
- * - 0xFF - when MTRR is not enabled
+ * Check and return the effective type for MTRR-MTRR type overlap.
+ * Returns 1 if the effective type is UNCACHEABLE, else returns 0
 */
-u8 mtrr_type_lookup(u64 start, u64 end)
+static int check_type_overlap(u8 *prev, u8 *curr)
+{
+	if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) {
+		*prev = MTRR_TYPE_UNCACHABLE;
+		*curr = MTRR_TYPE_UNCACHABLE;
+		return 1;
+	}
+
+	if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) ||
+	    (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) {
+		*prev = MTRR_TYPE_WRTHROUGH;
+		*curr = MTRR_TYPE_WRTHROUGH;
+	}
+
+	if (*prev != *curr) {
+		*prev = MTRR_TYPE_UNCACHABLE;
+		*curr = MTRR_TYPE_UNCACHABLE;
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Error/Semi-error returns:
+ * 0xFF - when MTRR is not enabled
+ * *repeat == 1 implies [start:end] spanned across MTRR range and type returned
+ *		corresponds only to [start:*partial_end].
+ *		Caller has to lookup again for [*partial_end:end].
+ */
+static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
 {
 	int i;
 	u64 base, mask;
 	u8 prev_match, curr_match;

+	*repeat = 0;
 	if (!mtrr_state_set)
 		return 0xFF;

@ -126,8 +167,34 @@ u8 mtrr_type_lookup(u64 start, u64 end)

 		start_state = ((start & mask) == (base & mask));
 		end_state = ((end & mask) == (base & mask));
-		if (start_state != end_state)
-			return 0xFE;
+
+		if (start_state != end_state) {
+			/*
+			 * We have start:end spanning across an MTRR.
+			 * We split the region into
+			 * either
+			 * (start:mtrr_end) (mtrr_end:end)
+			 * or
+			 * (start:mtrr_start) (mtrr_start:end)
+			 * depending on kind of overlap.
+			 * Return the type for first region and a pointer to
+			 * the start of second region so that caller will
+			 * lookup again on the second region.
+			 * Note: This way we handle multiple overlaps as well.
+			 */
+			if (start_state)
+				*partial_end = base + get_mtrr_size(mask);
+			else
+				*partial_end = base;
+
+			if (unlikely(*partial_end <= start)) {
+				WARN_ON(1);
+				*partial_end = start + PAGE_SIZE;
+			}
+
+			end = *partial_end - 1; /* end is inclusive */
+			*repeat = 1;
+		}

 		if ((start & mask) != (base & mask))
 			continue;
@ -138,21 +205,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 			continue;
 		}

-		if (prev_match == MTRR_TYPE_UNCACHABLE ||
-		    curr_match == MTRR_TYPE_UNCACHABLE) {
-			return MTRR_TYPE_UNCACHABLE;
-		}
-
-		if ((prev_match == MTRR_TYPE_WRBACK &&
-		     curr_match == MTRR_TYPE_WRTHROUGH) ||
-		    (prev_match == MTRR_TYPE_WRTHROUGH &&
-		     curr_match == MTRR_TYPE_WRBACK)) {
-			prev_match = MTRR_TYPE_WRTHROUGH;
-			curr_match = MTRR_TYPE_WRTHROUGH;
-		}
-
-		if (prev_match != curr_match)
-			return MTRR_TYPE_UNCACHABLE;
+		if (check_type_overlap(&prev_match, &curr_match))
+			return curr_match;
 	}

 	if (mtrr_tom2) {
@ -166,6 +220,36 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 	return mtrr_state.def_type;
 }

+/*
+ * Returns the effective MTRR type for the region
+ * Error return:
+ * 0xFF - when MTRR is not enabled
+ */
+u8 mtrr_type_lookup(u64 start, u64 end)
+{
+	u8 type, prev_type;
+	int repeat;
+	u64 partial_end;
+
+	type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+
+	/*
+	 * Common path is with repeat = 0.
+	 * However, we can have cases where [start:end] spans across some
+	 * MTRR range. Do repeated lookups for that case here.
+	 */
+	while (repeat) {
+		prev_type = type;
+		start = partial_end;
+		type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+
+		if (check_type_overlap(&prev_type, &type))
+			return type;
+	}
+
+	return type;
+}
+
 /* Get the MSR pair relating to a var range */
 static void
 get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void)
 {
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
-		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
-		    boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
-			return;
-		wd_ops = &k7_wd_ops;
-		break;
+		if (boot_cpu_data.x86 == 6 ||
+		    (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
+			wd_ops = &k7_wd_ops;
+		return;
 	case X86_VENDOR_INTEL:
 		/* Work around where perfctr1 doesn't have a working enable
 		 * bit as described in the following errata:
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@ -44,6 +44,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 		{ X86_FEATURE_LBRV,		CR_EDX, 1, 0x8000000a, 0 },
 		{ X86_FEATURE_SVML,		CR_EDX, 2, 0x8000000a, 0 },
 		{ X86_FEATURE_NRIPS,		CR_EDX, 3, 0x8000000a, 0 },
+		{ X86_FEATURE_TSCRATEMSR,	CR_EDX, 4, 0x8000000a, 0 },
+		{ X86_FEATURE_VMCBCLEAN,	CR_EDX, 5, 0x8000000a, 0 },
+		{ X86_FEATURE_FLUSHBYASID,	CR_EDX, 6, 0x8000000a, 0 },
+		{ X86_FEATURE_DECODEASSISTS,	CR_EDX, 7, 0x8000000a, 0 },
+		{ X86_FEATURE_PAUSEFILTER,	CR_EDX,10, 0x8000000a, 0 },
+		{ X86_FEATURE_PFTHRESHOLD,	CR_EDX,12, 0x8000000a, 0 },
 		{ 0, 0, 0, 0, 0 }
 	};

--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 	if (!csize)
 		return 0;

-	vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+	vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
 	if (!vaddr)
 		return -ENOMEM;

@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 	} else
 		memcpy(buf, vaddr + offset, csize);

+	set_iounmap_nonlazy();
 	iounmap(vaddr);
 	return csize;
 }
--- a/Show More
+++ b/Show More