aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds2009-04-01 09:47:12 -0700
committerLinus Torvalds2009-04-01 09:47:12 -0700
commite76e5b2c663ac74ae6a542ac20795c625e36a5cd (patch)
tree2e7271be1f3a26832f4b121839fc4044fbbf27a6
parent32527bc0e4b4fa7711ad1c923cf64ae72a7ffd9d (diff)
parenteeafda70bf2807544e96fa4e52b2433cd470ff46 (diff)
Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6: (88 commits) PCI: fix HT MSI mapping fix PCI: don't enable too much HT MSI mapping x86/PCI: make pci=lastbus=255 work when acpi is on PCI: save and restore PCIe 2.0 registers PCI: update fakephp for bus_id removal PCI: fix kernel oops on bridge removal PCI: fix conflict between SR-IOV and config space sizing powerpc/PCI: include pci.h in powerpc MSI implementation PCI Hotplug: schedule fakephp for feature removal PCI Hotplug: rename legacy_fakephp to fakephp PCI Hotplug: restore fakephp interface with complete reimplementation PCI: Introduce /sys/bus/pci/devices/.../rescan PCI: Introduce /sys/bus/pci/devices/.../remove PCI: Introduce /sys/bus/pci/rescan PCI: Introduce pci_rescan_bus() PCI: do not enable bridges more than once PCI: do not initialize bridges more than once PCI: always scan child buses PCI: pci_scan_slot() returns newly found devices PCI: don't scan existing devices ... Fix trivial append-only conflict in Documentation/feature-removal-schedule.txt
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci70
-rw-r--r--Documentation/DocBook/kernel-api.tmpl1
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt814
-rw-r--r--Documentation/PCI/pci-iov-howto.txt99
-rw-r--r--Documentation/feature-removal-schedule.txt32
-rw-r--r--Documentation/filesystems/sysfs-pci.txt10
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--arch/alpha/include/asm/pci.h14
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/pci-sysfs.c366
-rw-r--r--arch/powerpc/include/asm/pci.h4
-rw-r--r--arch/powerpc/kernel/msi.c5
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/pci/early.c19
-rw-r--r--arch/x86/pci/fixup.c20
-rw-r--r--arch/x86/pci/legacy.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c227
-rw-r--r--arch/x86/pci/mmconfig_64.c17
-rw-r--r--drivers/acpi/pci_root.c180
-rw-r--r--drivers/pci/Kconfig10
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/bus.c8
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c58
-rw-r--r--drivers/pci/hotplug/fakephp.c444
-rw-r--r--drivers/pci/hotplug/pciehp.h13
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c21
-rw-r--r--drivers/pci/hotplug/pciehp_core.c18
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c34
-rw-r--r--drivers/pci/hotplug/shpchp.h10
-rw-r--r--drivers/pci/hotplug/shpchp_pci.c2
-rw-r--r--drivers/pci/intel-iommu.c2
-rw-r--r--drivers/pci/iov.c680
-rw-r--r--drivers/pci/msi.c426
-rw-r--r--drivers/pci/msi.h6
-rw-r--r--drivers/pci/pci-acpi.c215
-rw-r--r--drivers/pci/pci-driver.c81
-rw-r--r--drivers/pci/pci-sysfs.c124
-rw-r--r--drivers/pci/pci.c193
-rw-r--r--drivers/pci/pci.h65
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c28
-rw-r--r--drivers/pci/pcie/aer/aerdrv_acpi.c2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c10
-rw-r--r--drivers/pci/pcie/portdrv.h14
-rw-r--r--drivers/pci/pcie/portdrv_bus.c18
-rw-r--r--drivers/pci/pcie/portdrv_core.c379
-rw-r--r--drivers/pci/pcie/portdrv_pci.c50
-rw-r--r--drivers/pci/probe.c210
-rw-r--r--drivers/pci/quirks.c221
-rw-r--r--drivers/pci/remove.c4
-rw-r--r--drivers/pci/search.c2
-rw-r--r--drivers/pci/setup-bus.c7
-rw-r--r--drivers/pci/setup-res.c15
-rw-r--r--drivers/pci/slot.c18
-rw-r--r--include/linux/acpi.h34
-rw-r--r--include/linux/msi.h13
-rw-r--r--include/linux/pci-acpi.h67
-rw-r--r--include/linux/pci.h61
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/pci_regs.h37
-rw-r--r--include/linux/pcieport_if.h36
62 files changed, 3641 insertions, 1902 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index e638e15a8895..97ad190e13af 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -41,6 +41,49 @@ Description:
for the device and attempt to bind to it. For example:
# echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
+What: /sys/bus/pci/drivers/.../remove_id
+Date: February 2009
+Contact: Chris Wright <chrisw@sous-sol.org>
+Description:
+ Writing a device ID to this file will remove an ID
+ that was dynamically added via the new_id sysfs entry.
+ The format for the device ID is:
+ VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device
+ ID, Subsystem Vendor ID, Subsystem Device ID, Class,
+ and Class Mask. The Vendor ID and Device ID fields are
+ required, the rest are optional. After successfully
+ removing an ID, the driver will no longer support the
+ device. This is useful to ensure auto probing won't
+ match the driver to the device. For example:
+ # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
+
+What: /sys/bus/pci/rescan
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of all PCI buses in the system, and
+ re-discover previously removed devices.
+ Depends on CONFIG_HOTPLUG.
+
+What: /sys/bus/pci/devices/.../remove
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ hot-remove the PCI device and any of its children.
+ Depends on CONFIG_HOTPLUG.
+
+What: /sys/bus/pci/devices/.../rescan
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of the device's parent bus and all
+ child buses, and re-discover devices removed earlier
+ from this part of the device tree.
+ Depends on CONFIG_HOTPLUG.
+
What: /sys/bus/pci/devices/.../vpd
Date: February 2008
Contact: Ben Hutchings <bhutchings@solarflare.com>
@@ -52,3 +95,30 @@ Description:
that some devices may have malformatted data. If the
underlying VPD has a writable section then the
corresponding section of this file will be writable.
+
+What: /sys/bus/pci/devices/.../virtfnN
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when hardware supports the SR-IOV
+ capability and the Physical Function driver has enabled it.
+ The symbolic link points to the PCI device sysfs entry of the
+ Virtual Function whose index is N (0...MaxVFs-1).
+
+What: /sys/bus/pci/devices/.../dep_link
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when hardware supports the SR-IOV
+ capability and the Physical Function driver has enabled it,
+ and this device has vendor specific dependencies with others.
+ The symbolic link points to the PCI device sysfs entry of
+ Physical Function this device depends on.
+
+What: /sys/bus/pci/devices/.../physfn
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when a device is a Virtual Function.
+ The symbolic link points to the PCI device sysfs entry of the
+ Physical Function this device associates with.
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index bc962cda6504..58c194572c76 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -199,6 +199,7 @@ X!Edrivers/pci/hotplug.c
-->
!Edrivers/pci/probe.c
!Edrivers/pci/rom.c
+!Edrivers/pci/iov.c
</sect1>
<sect1><title>PCI Hotplug Support Library</title>
!Edrivers/pci/hotplug/pci_hotplug_core.c
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 256defd7e174..dcf7acc720e1 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -4,506 +4,356 @@
Revised Feb 12, 2004 by Martine Silbermann
email: Martine.Silbermann@hp.com
Revised Jun 25, 2004 by Tom L Nguyen
+ Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com>
+ Copyright 2003, 2008 Intel Corporation
1. About this guide
-This guide describes the basics of Message Signaled Interrupts (MSI),
-the advantages of using MSI over traditional interrupt mechanisms,
-and how to enable your driver to use MSI or MSI-X. Also included is
-a Frequently Asked Questions (FAQ) section.
-
-1.1 Terminology
-
-PCI devices can be single-function or multi-function. In either case,
-when this text talks about enabling or disabling MSI on a "device
-function," it is referring to one specific PCI device and function and
-not to all functions on a PCI device (unless the PCI device has only
-one function).
-
-2. Copyright 2003 Intel Corporation
-
-3. What is MSI/MSI-X?
-
-Message Signaled Interrupt (MSI), as described in the PCI Local Bus
-Specification Revision 2.3 or later, is an optional feature, and a
-required feature for PCI Express devices. MSI enables a device function
-to request service by sending an Inbound Memory Write on its PCI bus to
-the FSB as a Message Signal Interrupt transaction. Because MSI is
-generated in the form of a Memory Write, all transaction conditions,
-such as a Retry, Master-Abort, Target-Abort or normal completion, are
-supported.
-
-A PCI device that supports MSI must also support pin IRQ assertion
-interrupt mechanism to provide backward compatibility for systems that
-do not support MSI. In systems which support MSI, the bus driver is
-responsible for initializing the message address and message data of
-the device function's MSI/MSI-X capability structure during device
-initial configuration.
-
-An MSI capable device function indicates MSI support by implementing
-the MSI/MSI-X capability structure in its PCI capability list. The
-device function may implement both the MSI capability structure and
-the MSI-X capability structure; however, the bus driver should not
-enable both.
-
-The MSI capability structure contains Message Control register,
-Message Address register and Message Data register. These registers
-provide the bus driver control over MSI. The Message Control register
-indicates the MSI capability supported by the device. The Message
-Address register specifies the target address and the Message Data
-register specifies the characteristics of the message. To request
-service, the device function writes the content of the Message Data
-register to the target address. The device and its software driver
-are prohibited from writing to these registers.
-
-The MSI-X capability structure is an optional extension to MSI. It
-uses an independent and separate capability structure. There are
-some key advantages to implementing the MSI-X capability structure
-over the MSI capability structure as described below.
-
- - Support a larger maximum number of vectors per function.
-
- - Provide the ability for system software to configure
- each vector with an independent message address and message
- data, specified by a table that resides in Memory Space.
-
- - MSI and MSI-X both support per-vector masking. Per-vector
- masking is an optional extension of MSI but a required
- feature for MSI-X. Per-vector masking provides the kernel the
- ability to mask/unmask a single MSI while running its
- interrupt service routine. If per-vector masking is
- not supported, then the device driver should provide the
- hardware/software synchronization to ensure that the device
- generates MSI when the driver wants it to do so.
-
-4. Why use MSI?
-
-As a benefit to the simplification of board design, MSI allows board
-designers to remove out-of-band interrupt routing. MSI is another
-step towards a legacy-free environment.
-
-Due to increasing pressure on chipset and processor packages to
-reduce pin count, the need for interrupt pins is expected to
-diminish over time. Devices, due to pin constraints, may implement
-messages to increase performance.
-
-PCI Express endpoints uses INTx emulation (in-band messages) instead
-of IRQ pin assertion. Using INTx emulation requires interrupt
-sharing among devices connected to the same node (PCI bridge) while
-MSI is unique (non-shared) and does not require BIOS configuration
-support. As a result, the PCI Express technology requires MSI
-support for better interrupt performance.
-
-Using MSI enables the device functions to support two or more
-vectors, which can be configured to target different CPUs to
-increase scalability.
-
-5. Configuring a driver to use MSI/MSI-X
-
-By default, the kernel will not enable MSI/MSI-X on all devices that
-support this capability. The CONFIG_PCI_MSI kernel option
-must be selected to enable MSI/MSI-X support.
-
-5.1 Including MSI/MSI-X support into the kernel
-
-To allow MSI/MSI-X capable device drivers to selectively enable
-MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described
-below), the VECTOR based scheme needs to be enabled by setting
-CONFIG_PCI_MSI during kernel config.
-
-Since the target of the inbound message is the local APIC, providing
-CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI.
-
-5.2 Configuring for MSI support
-
-Due to the non-contiguous fashion in vector assignment of the
-existing Linux kernel, this version does not support multiple
-messages regardless of a device function is capable of supporting
-more than one vector. To enable MSI on a device function's MSI
-capability structure requires a device driver to call the function
-pci_enable_msi() explicitly.
-
-5.2.1 API pci_enable_msi
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.
-int pci_enable_msi(struct pci_dev *dev)
-With this new API, a device driver that wants to have MSI
-enabled on its device function must call this API to enable MSI.
-A successful call will initialize the MSI capability structure
-with ONE vector, regardless of whether a device function is
-capable of supporting multiple messages. This vector replaces the
-pre-assigned dev->irq with a new MSI vector. To avoid a conflict
-of the new assigned vector with existing pre-assigned vector requires
-a device driver to call this API before calling request_irq().
+2. What are MSIs?
-5.2.2 API pci_disable_msi
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.
-void pci_disable_msi(struct pci_dev *dev)
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X
+capability was also introduced with PCI 3.0. It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.
-This API should always be used to undo the effect of pci_enable_msi()
-when a device driver is unloading. This API restores dev->irq with
-the pre-assigned IOAPIC vector and switches a device's interrupt
-mode to PCI pin-irq assertion/INTx emulation mode.
-
-Note that a device driver should always call free_irq() on the MSI vector
-that it has done request_irq() on before calling this API. Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector.
-
-5.2.3 MSI mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-capable device function between MSI mode and
-PIN-IRQ assertion mode.
-
- ------------ pci_enable_msi ------------------------
- | | <=============== | |
- | MSI MODE | | PIN-IRQ ASSERTION MODE |
- | | ===============> | |
- ------------ pci_disable_msi ------------------------
-
-
-Figure 1. MSI Mode vs. Legacy Mode
-
-In Figure 1, a device operates by default in legacy mode. Legacy
-in this context means PCI pin-irq assertion or PCI-Express INTx
-emulation. A successful MSI request (using pci_enable_msi()) switches
-a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem and a new
-assigned MSI vector will replace dev->irq.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
-device driver should always call free_irq() on the MSI vector it has
-done request_irq() on before calling pci_disable_msi(). Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector. Otherwise, the PCI subsystem restores a device's
-dev->irq with a pre-assigned IOAPIC vector and marks the released
-MSI vector as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve this MSI vector for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
-
-For the case where the PCI subsystem re-assigns this MSI vector to
-another driver, a request to switch back to MSI mode may result
-in being assigned a different MSI vector or a failure if no more
-vectors are available.
-
-5.3 Configuring for MSI-X support
-
-Due to the ability of the system software to configure each vector of
-the MSI-X capability structure with an independent message address
-and message data, the non-contiguous fashion in vector assignment of
-the existing Linux kernel has no impact on supporting multiple
-messages on an MSI-X capable device functions. To enable MSI-X on
-a device function's MSI-X capability structure requires its device
-driver to call the function pci_enable_msix() explicitly.
-
-The function pci_enable_msix(), once invoked, enables either
-all or nothing, depending on the current availability of PCI vector
-resources. If the PCI vector resources are available for the number
-of vectors requested by a device driver, this function will configure
-the MSI-X table of the MSI-X capability structure of a device with
-requested messages. To emphasize this reason, for example, a device
-may be capable for supporting the maximum of 32 vectors while its
-software driver usually may request 4 vectors. It is recommended
-that the device driver should call this function once during the
-initialization phase of the device driver.
-
-Unlike the function pci_enable_msi(), the function pci_enable_msix()
-does not replace the pre-assigned IOAPIC dev->irq with a new MSI
-vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
-into the field vector of each element contained in a second argument.
-Note that the pre-assigned IOAPIC dev->irq is valid only if the device
-operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
-using dev->irq by the device driver to request for interrupt service
-may result in unpredictable behavior.
-
-For each MSI-X vector granted, a device driver is responsible for calling
-other functions like request_irq(), enable_irq(), etc. to enable
-this vector with its corresponding interrupt service handler. It is
-a device driver's choice to assign all vectors with the same
-interrupt service handler or each vector with a unique interrupt
-service handler.
-
-5.3.1 Handling MMIO address space of MSI-X Table
-
-The PCI 3.0 specification has implementation notes that MMIO address
-space for a device's MSI-X structure should be isolated so that the
-software system can set different pages for controlling accesses to the
-MSI-X structure. The implementation of MSI support requires the PCI
-subsystem, not a device driver, to maintain full control of the MSI-X
-table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA. A device driver should not access the MMIO address
-space of the MSI-X table/MSI-X PBA.
-
-5.3.2 API pci_enable_msix
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.
-int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
-This API enables a device driver to request the PCI subsystem
-to enable MSI-X messages on its hardware device. Depending on
-the availability of PCI vectors resources, the PCI subsystem enables
-either all or none of the requested vectors.
+3. Why use MSIs?
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole. MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges). In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt. PCI transaction ordering rules require that all the data
+arrives in memory before the value can be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case. With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose. One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+4. How to use MSIs
+
+PCI devices are initialised to use pin-based interrupts. The device
+driver has to set up the device to use MSI or MSI-X. Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+4.1 Include kernel support for MSIs
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled. This option is only available on some architectures,
+and it may depend on some other options also being set. For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+4.2 Using MSI
+
+Most of the hard work is done for the driver in the PCI layer. It simply
+has to request that the PCI layer set up the MSI capability for this
+device.
+
+4.2.1 pci_enable_msi
+
+int pci_enable_msi(struct pci_dev *dev)
+
+A successful call will allocate ONE interrupt to the device, regardless
+of how many MSIs the device supports. The device will be switched from
+pin-based interrupt mode to MSI mode. The dev->irq number is changed
+to a new number which represents the message signaled interrupt.
+This function should be called before the driver calls request_irq()
+since enabling MSIs disables the pin-based IRQ and the driver will not
+receive interrupts on the old interrupt.
+
+4.2.2 pci_enable_msi_block
+
+int pci_enable_msi_block(struct pci_dev *dev, int count)
+
+This variation on the above call allows a device driver to request multiple
+MSIs. The MSI specification only allows interrupts to be allocated in
+powers of two, up to a maximum of 2^5 (32).
+
+If this function returns 0, it has succeeded in allocating at least as many
+interrupts as the driver requested (it may have allocated more in order
+to satisfy the power-of-two requirement). In this case, the function
+enables MSI on this device and updates dev->irq to be the lowest of
+the new interrupts assigned to it. The other interrupts assigned to
+the device are in the range dev->irq to dev->irq + count - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device. If this function returns a positive number, it will be
+less than 'count' and indicate the number of interrupts that could have
+been allocated. In neither case will the irq value have been
+updated, nor will the device have been switched into MSI mode.
+
+The device driver must decide what action to take if
+pci_enable_msi_block() returns a value less than the number asked for.
+Some devices can make use of fewer interrupts than the maximum they
+request; in this case the driver should call pci_enable_msi_block()
+again. Note that it is not guaranteed to succeed, even when the
+'count' has been reduced to the value returned from a previous call to
+pci_enable_msi_block(). This is because there are multiple constraints
+on the number of vectors that can be allocated; pci_enable_msi_block()
+will return as soon as it finds any constraint that doesn't allow the
+call to succeed.
+
+4.2.3 pci_disable_msi
+
+void pci_disable_msi(struct pci_dev *dev)
-Argument 'dev' points to the device (pci_dev) structure.
+This function should be used to undo the effect of pci_enable_msi() or
+pci_enable_msi_block(). Calling it restores dev->irq to the pin-based
+interrupt number and frees the previously allocated message signaled
+interrupt(s). The interrupt may subsequently be assigned to another
+device, so drivers should not cache the value of dev->irq.
-Argument 'entries' is a pointer to an array of msix_entry structs.
-The number of entries is indicated in argument 'nvec'.
-struct msix_entry is defined in /driver/pci/msi.h:
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3 Using MSI-X
+
+The MSI-X capability is much more flexible than the MSI capability.
+It supports up to 2048 interrupts, each of which can be controlled
+independently. To support this flexibility, drivers must use an array of
+`struct msix_entry':
struct msix_entry {
u16 vector; /* kernel uses to write alloc vector */
u16 entry; /* driver uses to specify entry */
};
-A device driver is responsible for initializing the field 'entry' of
-each element with a unique entry supported by MSI-X table. Otherwise,
--EINVAL will be returned as a result. A successful return of zero
-indicates the PCI subsystem completed initializing each of the requested
-entries of the MSI-X table with message address and message data.
-Last but not least, the PCI subsystem will write the 1:1
-vector-to-entry mapping into the field 'vector' of each element. A
-device driver is responsible for keeping track of allocated MSI-X
-vectors in its internal data structure.
-
-A return of zero indicates that the number of MSI-X vectors was
-successfully allocated. A return of greater than zero indicates
-MSI-X vector shortage. Or a return of less than zero indicates
-a failure. This failure may be a result of duplicate entries
-specified in second argument, or a result of no available vector,
-or a result of failing to initialize MSI-X table entries.
-
-5.3.3 API pci_disable_msix
+This allows for the device to use these interrupts in a sparse fashion;
+for example it could use interrupts 3 and 1027 and allocate only a
+two-element array. The driver is expected to fill in the 'entry' value
+in each element of the array to indicate which entries it wants the kernel
+to assign interrupts for. It is invalid to fill in two entries with the
+same number.
+
+4.3.1 pci_enable_msix
+
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+
+Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
+The 'entries' argument is a pointer to an array of msix_entry structs
+which should be at least 'nvec' entries in size. On success, the
+function will return 0 and the device will have been switched into
+MSI-X interrupt mode. The 'vector' elements in each entry will have
+been filled in with the interrupt number. The driver should then call
+request_irq() for each 'vector' that it decides to use.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to allocate any more MSI-X interrupts for
+this device. If it returns a positive number, it indicates the maximum
+number of interrupt vectors that could have been allocated. See example
+below.
+
+This function, in contrast with pci_enable_msi(), does not adjust
+dev->irq. The device will not generate interrupts for this interrupt
+number once MSI-X is enabled. The device driver is responsible for
+keeping track of the interrupts assigned to the MSI-X vectors so it can
+free them again later.
+
+Device drivers should normally call this function once per device
+during the initialization phase.
+
+It is ideal if drivers can cope with a variable number of MSI-X interrupts,
+there are many reasons why the platform may not be able to provide the
+exact number a driver asks for.
+
+A request loop to achieve that might look like:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+ while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
+ rc = pci_enable_msix(adapter->pdev,
+ adapter->msix_entries, nvec);
+ if (rc > 0)
+ nvec = rc;
+ else
+ return rc;
+ }
+
+ return -ENOSPC;
+}
+
+4.3.2 pci_disable_msix
void pci_disable_msix(struct pci_dev *dev)
-This API should always be used to undo the effect of pci_enable_msix()
-when a device driver is unloading. Note that a device driver should
-always call free_irq() on all MSI-X vectors it has done request_irq()
-on before calling this API. Failure to do so results in a BUG_ON() and
-a device will be left with MSI-X enabled and leaks its vectors.
-
-5.3.4 MSI-X mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-X capable device function between MSI-X mode and
-PIN-IRQ assertion mode (legacy).
-
- ------------ pci_enable_msix(,,n) ------------------------
- | | <=============== | |
- | MSI-X MODE | | PIN-IRQ ASSERTION MODE |
- | | ===============> | |
- ------------ pci_disable_msix ------------------------
-
-Figure 2. MSI-X Mode vs. Legacy Mode
-
-In Figure 2, a device operates by default in legacy mode. A
-successful MSI-X request (using pci_enable_msix()) switches a
-device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem; however,
-unlike MSI mode, the PCI subsystem will not replace dev->irq with
-assigned MSI-X vector because the PCI subsystem already writes the 1:1
-vector-to-entry mapping into the field 'vector' of each element
-specified in second argument.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
-a device driver should always call free_irq() on all MSI-X vectors it
-has done request_irq() on before calling pci_disable_msix(). Failure
-to do so results in a BUG_ON() and a device will be left with MSI-X
-enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
-device function's interrupt mode from MSI-X mode to legacy mode and
-marks all allocated MSI-X vectors as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve these MSI-X vectors for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, these MSI-X vectors may be
-re-assigned.
-
-For the case where the PCI subsystem re-assigned these MSI-X vectors
-to other drivers, a request to switch back to MSI-X mode may result
-being assigned with another set of MSI-X vectors or a failure if no
-more vectors are available.
-
-5.4 Handling function implementing both MSI and MSI-X capabilities
-
-For the case where a function implements both MSI and MSI-X
-capabilities, the PCI subsystem enables a device to run either in MSI
-mode or MSI-X mode but not both. A device driver determines whether it
-wants MSI or MSI-X enabled on its hardware device. Once a device
-driver requests for MSI, for example, it is prohibited from requesting
-MSI-X; in other words, a device driver is not permitted to ping-pong
-between MSI mod MSI-X mode during a run-time.
-
-5.5 Hardware requirements for MSI/MSI-X support
-
-MSI/MSI-X support requires support from both system hardware and
-individual hardware device functions.
-
-5.5.1 Required x86 hardware support
-
-Since the target of MSI address is the local APIC CPU, enabling
-MSI/MSI-X support in the Linux kernel is dependent on whether existing
-system hardware supports local APIC. Users should verify that their
-system supports local APIC operation by testing that it runs when
-CONFIG_X86_LOCAL_APIC=y.
-
-In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
-however, in UP environment, users must manually set
-CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
-CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
-MSI-capable device drivers to selectively enable MSI/MSI-X.
-
-Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
-vector is allocated new during runtime and MSI/MSI-X support does not
-depend on BIOS support. This key independency enables MSI/MSI-X
-support on future IOxAPIC free platforms.
-
-5.5.2 Device hardware support
-
-The hardware device function supports MSI by indicating the
-MSI/MSI-X capability structure on its PCI capability list. By
-default, this capability structure will not be initialized by
-the kernel to enable MSI during the system boot. In other words,
-the device function is running on its default pin assertion mode.
-Note that in many cases the hardware supporting MSI have bugs,
-which may result in system hangs. The software driver of specific
-MSI-capable hardware is responsible for deciding whether to call
-pci_enable_msi or not. A return of zero indicates the kernel
-successfully initialized the MSI/MSI-X capability structure of the
-device function. The device function is now running on MSI/MSI-X mode.
-
-5.6 How to tell whether MSI/MSI-X is enabled on device function
-
-At the driver level, a return of zero from the function call of
-pci_enable_msi()/pci_enable_msix() indicates to a device driver that
-its device function is initialized successfully and ready to run in
-MSI/MSI-X mode.
-
-At the user level, users can use the command 'cat /proc/interrupts'
-to display the vectors allocated for devices and their interrupt
-MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
-enabled on a SCSI Adaptec 39320D Ultra320 controller.
-
- CPU0 CPU1
- 0: 324639 0 IO-APIC-edge timer
- 1: 1186 0 IO-APIC-edge i8042
- 2: 0 0 XT-PIC cascade
- 12: 2797 0 IO-APIC-edge i8042
- 14: 6543 0 IO-APIC-edge ide0
- 15: 1 0 IO-APIC-edge ide1
-169: 0 0 IO-APIC-level uhci-hcd
-185: 0 0 IO-APIC-level uhci-hcd
-193: 138 10 PCI-MSI aic79xx
-201: 30 0 PCI-MSI aic79xx
-225: 30 0 IO-APIC-level aic7xxx
-233: 30 0 IO-APIC-level aic7xxx
-NMI: 0 0
-LOC: 324553 325068
-ERR: 0
-MIS: 0
-
-6. MSI quirks
-
-Several PCI chipsets or devices are known to not support MSI.
-The PCI stack provides 3 possible levels of MSI disabling:
-* on a single device
-* on all devices behind a specific bridge
-* globally
-
-6.1. Disabling MSI on a single device
-
-Under some circumstances it might be required to disable MSI on a
-single device. This may be achieved by either not calling pci_enable_msi()
-or all, or setting the pci_dev->no_msi flag before (most of the time
-in a quirk).
-
-6.2. Disabling MSI below a bridge
-
-The vast majority of MSI quirks are required by PCI bridges not
-being able to route MSI between busses. In this case, MSI have to be
-disabled on all devices behind this bridge. It is achieves by setting
-the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge
-subordinate bus. There is no need to set the same flag on bridges that
-are below the broken bridge. When pci_enable_msi() is called to enable
-MSI on a device, pci_msi_supported() takes care of checking the NO_MSI
-flag in all parent busses of the device.
-
-Some bridges actually support dynamic MSI support enabling/disabling
-by changing some bits in their PCI configuration space (especially
-the Hypertransport chipsets such as the nVidia nForce and Serverworks
-HT2000). It may then be required to update the NO_MSI flag on the
-corresponding devices in the sysfs hierarchy. To enable MSI support
-on device "0000:00:0e", do:
-
- echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus
-
-To disable MSI support, echo 0 instead of 1. Note that it should be
-used with caution since changing this value might break interrupts.
-
-6.3. Disabling MSI globally
-
-Some extreme cases may require to disable MSI globally on the system.
-For now, the only known case is a Serverworks PCI-X chipsets (MSI are
-not supported on several busses that are not all connected to the
-chipset in the Linux PCI hierarchy). In the vast majority of other
-cases, disabling only behind a specific bridge is enough.
-
-For debugging purpose, the user may also pass pci=nomsi on the kernel
-command-line to explicitly disable MSI globally. But, once the appro-
-priate quirks are added to the kernel, this option should not be
-required anymore.
-
-6.4. Finding why MSI cannot be enabled on a device
-
-Assuming that MSI are not enabled on a device, you should look at
-dmesg to find messages that quirks may output when disabling MSI
-on some devices, some bridges or even globally.
-Then, lspci -t gives the list of bridges above a device. Reading
-/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI
-are enabled (1) or disabled (0). In 0 is found in a single bridge
-msi_bus file above the device, MSI cannot be enabled.
-
-7. FAQ
-
-Q1. Are there any limitations on using the MSI?
-
-A1. If the PCI device supports MSI and conforms to the
-specification and the platform supports the APIC local bus,
-then using MSI should work.
-
-Q2. Will it work on all the Pentium processors (P3, P4, Xeon,
-AMD processors)? In P3 IPI's are transmitted on the APIC local
-bus and in P4 and Xeon they are transmitted on the system
-bus. Are there any implications with this?
-
-A2. MSI support enables a PCI device sending an inbound
-memory write (0xfeexxxxx as target address) on its PCI bus
-directly to the FSB. Since the message address has a
-redirection hint bit cleared, it should work.
-
-Q3. The target address 0xfeexxxxx will be translated by the
-Host Bridge into an interrupt message. Are there any
-limitations on the chipsets such as Intel 8xx, Intel e7xxx,
-or VIA?
-
-A3. If these chipsets support an inbound memory write with
-target address set as 0xfeexxxxx, as conformed to PCI
-specification 2.3 or latest, then it should work.
-
-Q4. From the driver point of view, if the MSI is lost because
-of errors occurring during inbound memory write, then it may
-wait forever. Is there a mechanism for it to recover?
-
-A4. Since the target of the transaction is an inbound memory
-write, all transaction termination conditions (Retry,
-Master-Abort, Target-Abort, or normal completion) are
-supported. A device sending an MSI must abide by all the PCI
-rules and conditions regarding that inbound memory write. So,
-if a retry is signaled it must retry, etc... We believe that
-the recommendation for Abort is also a retry (refer to PCI
-specification 2.3 or latest).
+This API should be used to undo the effect of pci_enable_msix(). It frees
+the previously allocated message signaled interrupts. The interrupts may
+subsequently be assigned to another device, so drivers should not cache
+the value of the 'vector' elements over a call to pci_disable_msix().
+
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3.3 The MSI-X Table
+
+The MSI-X capability specifies a BAR and offset within that BAR for the
+MSI-X Table. This address is mapped by the PCI subsystem, and should not
+be accessed directly by the device driver. If the driver wishes to
+mask or unmask an interrupt, it should call disable_irq() / enable_irq().
+
+4.4 Handling devices implementing both MSI and MSI-X capabilities
+
+If a device implements both MSI and MSI-X capabilities, it can
+run in either MSI mode or MSI-X mode but not both simultaneously.
+This is a requirement of the PCI spec, and it is enforced by the
+PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or
+pci_enable_msix() when MSI is already enabled will result in an error.
+If a device driver wishes to switch between MSI and MSI-X at runtime,
+it must first quiesce the device, then switch it back to pin-interrupt
+mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
+operation. This is not expected to be a common operation but may be
+useful for debugging or testing during development.
+
+4.5 Considerations when using MSIs
+
+4.5.1 Choosing between MSI-X and MSI
+
+If your device supports both MSI-X and MSI capabilities, you should use
+the MSI-X facilities in preference to the MSI facilities. As mentioned
+above, MSI-X supports any number of interrupts between 1 and 2048.
+In constrast, MSI is restricted to a maximum of 32 interrupts (and
+must be a power of two). In addition, the MSI interrupt vectors must
+be allocated consecutively, so the system may not be able to allocate
+as many vectors for MSI as it could for MSI-X. On some platforms, MSI
+interrupts must all be targetted at the same set of CPUs whereas MSI-X
+interrupts can all be targetted at different CPUs.
+
+4.5.2 Spinlocks
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler. With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered). If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held. If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock.
+
+There are two solutions. The first is to take the lock with
+spin_lock_irqsave() or spin_lock_irq() (see
+Documentation/DocBook/kernel-locking). The second is to specify
+IRQF_DISABLED to request_irq() so that the kernel runs the entire
+interrupt routine with interrupts disabled.
+
+If your MSI interrupt routine does not hold the lock for the whole time
+it is running, the first solution may be best. The second solution is
+normally preferred as it avoids making two transitions from interrupt
+disabled to enabled and back again.
+
+4.6 How to tell whether MSI/MSI-X is enabled on a device
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
+has an 'Enable' flag which will be followed with either "+" (enabled)
+or "-" (disabled).
+
+
+5. MSI quirks
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+5.1. Disabling MSIs globally
+
+Some host chipsets simply don't support MSIs properly. If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table. In this case, Linux will automatically disable MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves. The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices. It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+5.2. Disabling MSIs below a bridge
+
+Some PCI bridges are not able to route MSIs between busses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000). As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge which Linux doesn't yet know about, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing:
+
+ echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1. Changing this value should be
+done with caution as it can break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+5.3. Disabling MSIs on a single device
+
+Some devices are known to have faulty MSI implementations. Usually this
+is handled in the individual device driver but occasionally it's necessary
+to handle this with a quirk. Some drivers have an option to disable use
+of MSI. While this is a convenient workaround for the driver author,
+it is not good practise, and should not be emulated.
+
+5.4. Finding why MSIs are disabled on a device
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device. Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine. You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device. Reading
+/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1)
+or disabled (0). If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or
+pci_enable_msi_block().
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
new file mode 100644
index 000000000000..fc73ef5d65b8
--- /dev/null
+++ b/Documentation/PCI/pci-iov-howto.txt
@@ -0,0 +1,99 @@
+ PCI Express I/O Virtualization Howto
+ Copyright (C) 2009 Intel Corporation
+ Yu Zhao <yu.zhao@intel.com>
+
+
+1. Overview
+
+1.1 What is SR-IOV
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+2. User Guide
+
+2.1 How can I enable SR-IOV capability
+
+The device driver (PF driver) will control the enabling and disabling
+of the capability via API provided by SR-IOV core. If the hardware
+has SR-IOV capability, loading its PF driver would enable it and all
+VFs associated with the PF.
+
+2.2 How can I use the Virtual Functions
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+3. Developer Guide
+
+3.1 SR-IOV API
+
+To enable SR-IOV capability:
+ int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+ 'nr_virtfn' is number of VFs to be enabled.
+
+To disable SR-IOV capability:
+ void pci_disable_sriov(struct pci_dev *dev);
+
+To notify SR-IOV core of Virtual Function Migration:
+ irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+
+3.2 Usage example
+
+Following piece of code illustrates the usage of the SR-IOV API.
+
+static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ pci_enable_sriov(dev, NR_VIRTFN);
+
+ ...
+
+ return 0;
+}
+
+static void __devexit dev_remove(struct pci_dev *dev)
+{
+ pci_disable_sriov(dev);
+
+ ...
+}
+
+static int dev_suspend(struct pci_dev *dev, pm_message_t state)
+{
+ ...
+
+ return 0;
+}
+
+static int dev_resume(struct pci_dev *dev)
+{
+ ...
+
+ return 0;
+}
+
+static void dev_shutdown(struct pci_dev *dev)
+{
+ ...
+}
+
+static struct pci_driver dev_driver = {
+ .name = "SR-IOV Physical Function driver",
+ .id_table = dev_id_table,
+ .probe = dev_probe,
+ .remove = __devexit_p(dev_remove),
+ .suspend = dev_suspend,
+ .resume = dev_resume,
+ .shutdown = dev_shutdown,
+};
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ea7d1bdad34d..d0f354670646 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -392,3 +392,35 @@ Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
have been kept around for migration reasons. After more than two years
it's time to remove them finally
Who: Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
+What: fakephp and associated sysfs files in /sys/bus/pci/slots/
+When: 2011
+Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
+ represent a machine's physical PCI slots. The change in semantics
+ had userspace implications, as the hotplug core no longer allowed
+ drivers to create multiple sysfs files per physical slot (required
+ for multi-function devices, e.g.). fakephp was seen as a developer's
+ tool only, and its interface changed. Too late, we learned that
+ there were some users of the fakephp interface.
+
+ In 2.6.30, the original fakephp interface was restored. At the same
+ time, the PCI core gained the ability that fakephp provided, namely
+ function-level hot-remove and hot-add.
+
+ Since the PCI core now provides the same functionality, exposed in:
+
+ /sys/bus/pci/rescan
+ /sys/bus/pci/devices/.../remove
+ /sys/bus/pci/devices/.../rescan
+
+ there is no functional reason to maintain fakephp as well.
+
+ We will keep the existing module so that 'modprobe fakephp' will
+ present the old /sys/bus/pci/slots/... interface for compatibility,
+ but users are urged to migrate their applications to the API above.
+
+ After a reasonable transition period, we will remove the legacy
+ fakephp interface.
+Who: Alex Chiang <achiang@hp.com>
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 9f8740ca3f3b..26e4b8bc53ee 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -12,6 +12,7 @@ that support it. For example, a given bus might look like this:
| |-- enable
| |-- irq
| |-- local_cpus
+ | |-- remove
| |-- resource
| |-- resource0
| |-- resource1
@@ -36,6 +37,7 @@ files, each with their own function.
enable Whether the device is enabled (ascii, rw)
irq IRQ number (ascii, ro)
local_cpus nearby CPU mask (cpumask, ro)
+ remove remove device from kernel's list (ascii, wo)
resource PCI resource host addresses (ascii, ro)
resource0..N PCI resource N, if present (binary, mmap)
resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap)
@@ -46,6 +48,7 @@ files, each with their own function.
ro - read only file
rw - file is readable and writable
+ wo - write only file
mmap - file is mmapable
ascii - file contains ascii text
binary - file contains binary data
@@ -73,6 +76,13 @@ that the device must be enabled for a rom read to return data succesfully.
In the event a driver is not bound to the device, it can be enabled using the
'enable' file, documented above.
+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file. This does not involve any kind of hot-plug functionality,
+e.g. powering off the device. The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
Accessing legacy resources through sysfs
----------------------------------------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index aeedb89a307a..240257dd4238 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1695,6 +1695,8 @@ and is between 256 and 4096 characters. It is defined in the file
See also Documentation/blockdev/paride.txt.
pci=option[,option...] [PCI] various PCI subsystem options:
+ earlydump [X86] dump PCI config space before the kernel
+ changes anything
off [X86] don't probe for the PCI bus
bios [X86-32] force use of PCI BIOS, don't access
the hardware directly. Use this if your machine
@@ -1794,6 +1796,15 @@ and is between 256 and 4096 characters. It is defined in the file
cbmemsize=nn[KMG] The fixed amount of bus space which is
reserved for the CardBus bridge's memory
window. The default value is 64 megabytes.
+ resource_alignment=
+ Format:
+ [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+ Specifies alignment and device to reassign
+ aligned memory resources.
+ If <order of align> is not specified,
+ PAGE_SIZE is used as alignment.
+ PCI-PCI bridge can be specified, if resource
+ windows need to be expanded.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index 2a14302c17a3..cb04eaa6ba33 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -273,4 +273,18 @@ struct pci_dev *alpha_gendev_to_pci(struct device *dev);
extern struct pci_dev *isa_bridge;
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+ size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+ size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+ enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY 1
+
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+
#endif /* __ALPHA_PCI_H */
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index b4697759a123..a427538252f8 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -12,7 +12,7 @@ obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
obj-$(CONFIG_VGA_HOSE) += console.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_PCI) += pci.o pci_iommu.o
+obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o
obj-$(CONFIG_SRM_ENV) += srm_env.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
new file mode 100644
index 000000000000..6ea822e7f724
--- /dev/null
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -0,0 +1,366 @@
+/*
+ * arch/alpha/kernel/pci-sysfs.c
+ *
+ * Copyright (C) 2009 Ivan Kokshaysky
+ *
+ * Alpha PCI resource files.
+ *
+ * Loosely based on generic HAVE_PCI_MMAP implementation in
+ * drivers/pci/pci-sysfs.c
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+static int hose_mmap_page_range(struct pci_controller *hose,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_type, int sparse)
+{
+ unsigned long base;
+
+ if (mmap_type == pci_mmap_mem)
+ base = sparse ? hose->sparse_mem_base : hose->dense_mem_base;
+ else
+ base = sparse ? hose->sparse_io_base : hose->dense_io_base;
+
+ vma->vm_pgoff += base >> PAGE_SHIFT;
+ vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+ return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+static int __pci_mmap_fits(struct pci_dev *pdev, int num,
+ struct vm_area_struct *vma, int sparse)
+{
+ unsigned long nr, start, size;
+ int shift = sparse ? 5 : 0;
+
+ nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ start = vma->vm_pgoff;
+ size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
+
+ if (start < size && size - start >= nr)
+ return 1;
+ WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d "
+ "(size 0x%08lx)\n",
+ current->comm, sparse ? " sparse" : "", start, start + nr,
+ pci_name(pdev), num, size);
+ return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @sparse: address space type
+ *
+ * Use the bus mapping routines to map a PCI resource into userspace.
+ */
+static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+ struct vm_area_struct *vma, int sparse)
+{
+ struct pci_dev *pdev = to_pci_dev(container_of(kobj,
+ struct device, kobj));
+ struct resource *res = (struct resource *)attr->private;
+ enum pci_mmap_state mmap_type;
+ struct pci_bus_region bar;
+ int i;
+
+ for (i = 0; i < PCI_ROM_RESOURCE; i++)
+ if (res == &pdev->resource[i])
+ break;
+ if (i >= PCI_ROM_RESOURCE)
+ return -ENODEV;
+
+ if (!__pci_mmap_fits(pdev, i, vma, sparse))
+ return -EINVAL;
+
+ if (iomem_is_exclusive(res->start))
+ return -EINVAL;
+
+ pcibios_resource_to_bus(pdev, &bar, res);
+ vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0));
+ mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+ return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
+}
+
+static int pci_mmap_resource_sparse(struct kobject *kobj,
+ struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static int pci_mmap_resource_dense(struct kobject *kobj,
+ struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @dev: dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+void pci_remove_resource_files(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ struct bin_attribute *res_attr;
+
+ res_attr = pdev->res_attr[i];
+ if (res_attr) {
+ sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+ kfree(res_attr);
+ }
+
+ res_attr = pdev->res_attr_wc[i];
+ if (res_attr) {
+ sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+ kfree(res_attr);
+ }
+ }
+}
+
+static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num)
+{
+ struct pci_bus_region bar;
+ struct pci_controller *hose = pdev->sysdata;
+ long dense_offset;
+ unsigned long sparse_size;
+
+ pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]);
+
+ /* All core logic chips have 4G sparse address space, except
+ CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM
+ definitions in asm/core_xxx.h files). This corresponds
+ to 128M or 512M of the bus space. */
+ dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base);
+ sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000;
+
+ return bar.end < sparse_size;
+}
+
+static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name,
+ char *suffix, struct bin_attribute *res_attr,
+ unsigned long sparse)
+{
+ size_t size = pci_resource_len(pdev, num);
+
+ sprintf(name, "resource%d%s", num, suffix);
+ res_attr->mmap = sparse ? pci_mmap_resource_sparse :
+ pci_mmap_resource_dense;
+ res_attr->attr.name = name;
+ res_attr->attr.mode = S_IRUSR | S_IWUSR;
+ res_attr->size = sparse ? size << 5 : size;
+ res_attr->private = &pdev->resource[num];
+ return sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num)
+{
+ /* allocate attribute structure, piggyback attribute name */
+ int retval, nlen1, nlen2 = 0, res_count = 1;
+ unsigned long sparse_base, dense_base;
+ struct bin_attribute *attr;
+ struct pci_controller *hose = pdev->sysdata;
+ char *suffix, *attr_name;
+
+ suffix = ""; /* Assume bwx machine, normal resourceN files. */
+ nlen1 = 10;
+
+ if (pdev->resource[num].flags & IORESOURCE_MEM) {
+ sparse_base = hose->sparse_mem_base;
+ dense_base = hose->dense_mem_base;
+ if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) {
+ sparse_base = 0;
+ suffix = "_dense";
+ nlen1 = 16; /* resourceN_dense */
+ }
+ } else {
+ sparse_base = hose->sparse_io_base;
+ dense_base = hose->dense_io_base;
+ }
+
+ if (sparse_base) {
+ suffix = "_sparse";
+ nlen1 = 17;
+ if (dense_base) {
+ nlen2 = 16; /* resourceN_dense */
+ res_count = 2;
+ }
+ }
+
+ attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC);
+ if (!attr)
+ return -ENOMEM;
+
+ /* Create bwx, sparse or single dense file */
+ attr_name = (char *)(attr + res_count);
+ pdev->res_attr[num] = attr;
+ retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr,
+ sparse_base);
+ if (retval || res_count == 1)
+ return retval;
+
+ /* Create dense file */
+ attr_name += nlen1;
+ attr++;
+ pdev->res_attr_wc[num] = attr;
+ return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0);
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @dev
+ * @dev: dev in question
+ *
+ * Walk the resources in @dev creating files for each resource available.
+ */
+int pci_create_resource_files(struct pci_dev *pdev)
+{
+ int i;
+ int retval;
+
+ /* Expose the PCI resources from this device as files */
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+
+ /* skip empty resources */
+ if (!pci_resource_len(pdev, i))
+ continue;
+
+ retval = pci_create_attr(pdev, i);
+ if (retval) {
+ pci_remove_resource_files(pdev);
+ return retval;
+ }
+ }
+ return 0;
+}
+
+/* Legacy I/O bus mapping stuff. */
+
+static int __legacy_mmap_fits(struct pci_controller *hose,
+ struct vm_area_struct *vma,
+ unsigned long res_size, int sparse)
+{
+ unsigned long nr, start, size;
+
+ nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ start = vma->vm_pgoff;
+ size = ((res_size - 1) >> PAGE_SHIFT) + 1;
+
+ if (start < size && size - start >= nr)
+ return 1;
+ WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d "
+ "(size 0x%08lx)\n",
+ current->comm, sparse ? " sparse" : "", start, start + nr,
+ hose->index, size);
+ return 0;
+}
+
+static inline int has_sparse(struct pci_controller *hose,
+ enum pci_mmap_state mmap_type)
+{
+ unsigned long base;
+
+ base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base :
+ hose->sparse_io_base;
+
+ return base != 0;
+}
+
+int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_type)
+{
+ struct pci_controller *hose = bus->sysdata;
+ int sparse = has_sparse(hose, mmap_type);
+ unsigned long res_size;
+
+ res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size :
+ bus->legacy_io->size;
+ if (!__legacy_mmap_fits(hose, vma, res_size, sparse))
+ return -EINVAL;
+
+ return hose_mmap_page_range(hose, vma, mmap_type, sparse);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Adjust file name and size for sparse mappings.
+ */
+void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ if (!has_sparse(hose, mmap_type))
+ return;
+
+ if (mmap_type == pci_mmap_mem) {
+ bus->legacy_mem->attr.name = "legacy_mem_sparse";
+ bus->legacy_mem->size <<= 5;
+ } else {
+ bus->legacy_io->attr.name = "legacy_io_sparse";
+ bus->legacy_io->size <<= 5;
+ }
+ return;
+}
+
+/* Legacy I/O bus read/write functions */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ port += hose->io_space->start;
+
+ switch(size) {
+ case 1:
+ *((u8 *)val) = inb(port);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ *((u16 *)val) = inw(port);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ *((u32 *)val) = inl(port);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ port += hose->io_space->start;
+
+ switch(size) {
+ case 1:
+ outb(port, val);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ outw(port, val);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ outl(port, val);
+ return 4;
+ }
+ return -EINVAL;
+}
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 3548159a1beb..ba17d5d90a49 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -114,6 +114,10 @@ extern int pci_domain_nr(struct pci_bus *bus);
/* Decide whether to display the domain number in /proc */
extern int pci_proc_domain(struct pci_bus *bus);
+/* MSI arch hooks */
+#define arch_setup_msi_irqs arch_setup_msi_irqs
+#define arch_teardown_msi_irqs arch_teardown_msi_irqs
+#define arch_msi_check_device arch_msi_check_device
struct vm_area_struct;
/* Map a range of PCI memory or I/O space for a device into user space */
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 3bb7d3dd28be..8bbc12d20f5c 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/msi.h>
+#include <linux/pci.h>
#include <asm/machdep.h>
@@ -19,6 +20,10 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
return -ENOSYS;
}
+ /* PowerPC doesn't support multiple MSI yet */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
if (ppc_md.msi_check_device) {
pr_debug("msi: Using platform check routine.\n");
return ppc_md.msi_check_device(dev, nvec, type);
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index a977de23cb4d..a0301bfeb954 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -86,6 +86,9 @@ static inline void early_quirks(void) { }
extern void pci_iommu_alloc(void);
+/* MSI arch hook */
+#define arch_setup_msi_irqs arch_setup_msi_irqs
+
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index da99ffcdfde6..1bb5c6cee3eb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3468,6 +3468,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
struct intel_iommu *iommu = NULL;
int index = 0;
+ /* x86 doesn't support multiple MSI yet */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
irq_want = nr_irqs_gsi;
sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index c7c4776ff630..90f5b9ef5def 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -300,8 +300,7 @@ fs_initcall(pci_iommu_init);
static __devinit void via_no_dac(struct pci_dev *dev)
{
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
- printk(KERN_INFO
- "PCI: VIA PCI bridge detected. Disabling DAC.\n");
+ dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
forbid_dac = 1;
}
}
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index f6adf2c6d751..aaf26ae58cd5 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -69,11 +69,12 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func)
int j;
u32 val;
- printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func);
+ printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:",
+ bus, slot, func);
for (i = 0; i < 256; i += 4) {
if (!(i & 0x0f))
- printk("\n%04x:",i);
+ printk("\n %02x:",i);
val = read_pci_config(bus, slot, func, i);
for (j = 0; j < 4; j++) {
@@ -96,20 +97,22 @@ void early_dump_pci_devices(void)
for (func = 0; func < 8; func++) {
u32 class;
u8 type;
+
class = read_pci_config(bus, slot, func,
PCI_CLASS_REVISION);
if (class == 0xffffffff)
- break;
+ continue;
early_dump_pci_device(bus, slot, func);
- /* No multi-function device? */
- type = read_pci_config_byte(bus, slot, func,
+ if (func == 0) {
+ type = read_pci_config_byte(bus, slot,
+ func,
PCI_HEADER_TYPE);
- if (!(type & 0x80))
- break;
+ if (!(type & 0x80))
+ break;
+ }
}
}
}
}
-
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 9c49919e4d1c..6dd89555fbfa 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -495,26 +495,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
pci_siemens_interrupt_controller);
/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
- * 4096 bytes configuration space for each function of their processor
- * configuration space.
- */
-static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
-{
- dev->cfg_size = pci_cfg_space_size_ext(dev);
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
-
-/*
* SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
* confusing the PCI engine:
*/
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index f1065b129e9c..4061bb0f267d 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -50,8 +50,6 @@ static int __init pci_legacy_init(void)
if (pci_root_bus)
pci_bus_add_devices(pci_root_bus);
- pcibios_fixup_peer_bridges();
-
return 0;
}
@@ -67,6 +65,7 @@ int __init pci_subsys_init(void)
pci_visws_init();
#endif
pci_legacy_init();
+ pcibios_fixup_peer_bridges();
pcibios_irq_init();
pcibios_init();
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 89bf9242c80a..905bb526b133 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/bitmap.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>
@@ -24,24 +25,49 @@
/* Indicate if the mmcfg resources have been placed into the resource table. */
static int __initdata pci_mmcfg_resources_inserted;
+static __init int extend_mmcfg(int num)
+{
+ struct acpi_mcfg_allocation *new;
+ int new_num = pci_mmcfg_config_num + num;
+
+ new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL);
+ if (!new)
+ return -1;
+
+ if (pci_mmcfg_config) {
+ memcpy(new, pci_mmcfg_config,
+ sizeof(pci_mmcfg_config[0]) * new_num);
+ kfree(pci_mmcfg_config);
+ }
+ pci_mmcfg_config = new;
+
+ return 0;
+}
+
+static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end)
+{
+ int i = pci_mmcfg_config_num;
+
+ pci_mmcfg_config_num++;
+ pci_mmcfg_config[i].address = addr;
+ pci_mmcfg_config[i].pci_segment = segment;
+ pci_mmcfg_config[i].start_bus_number = start;
+ pci_mmcfg_config[i].end_bus_number = end;
+}
+
static const char __init *pci_mmcfg_e7520(void)
{
u32 win;
raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win);
win = win & 0xf000;
- if(win == 0x0000 || win == 0xf000)
- pci_mmcfg_config_num = 0;
- else {
- pci_mmcfg_config_num = 1;
- pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
- if (!pci_mmcfg_config)
- return NULL;
- pci_mmcfg_config[0].address = win << 16;
- pci_mmcfg_config[0].pci_segment = 0;
- pci_mmcfg_config[0].start_bus_number = 0;
- pci_mmcfg_config[0].end_bus_number = 255;
- }
+ if (win == 0x0000 || win == 0xf000)
+ return NULL;
+
+ if (extend_mmcfg(1) == -1)
+ return NULL;
+
+ fill_one_mmcfg(win << 16, 0, 0, 255);
return "Intel Corporation E7520 Memory Controller Hub";
}
@@ -50,13 +76,11 @@ static const char __init *pci_mmcfg_intel_945(void)
{
u32 pciexbar, mask = 0, len = 0;
- pci_mmcfg_config_num = 1;
-
raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar);
/* Enable bit */
if (!(pciexbar & 1))
- pci_mmcfg_config_num = 0;
+ return NULL;
/* Size bits */
switch ((pciexbar >> 1) & 3) {
@@ -73,28 +97,23 @@ static const char __init *pci_mmcfg_intel_945(void)
len = 0x04000000U;
break;
default:
- pci_mmcfg_config_num = 0;
+ return NULL;
}
/* Errata #2, things break when not aligned on a 256Mb boundary */
/* Can only happen in 64M/128M mode */
if ((pciexbar & mask) & 0x0fffffffU)
- pci_mmcfg_config_num = 0;
+ return NULL;
/* Don't hit the APIC registers and their friends */
if ((pciexbar & mask) >= 0xf0000000U)
- pci_mmcfg_config_num = 0;
-
- if (pci_mmcfg_config_num) {
- pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
- if (!pci_mmcfg_config)
- return NULL;
- pci_mmcfg_config[0].address = pciexbar & mask;
- pci_mmcfg_config[0].pci_segment = 0;
- pci_mmcfg_config[0].start_bus_number = 0;
- pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1;
- }
+ return NULL;
+
+ if (extend_mmcfg(1) == -1)
+ return NULL;
+
+ fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1);
return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
}
@@ -138,22 +157,77 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
busnbits = 8;
}
- pci_mmcfg_config_num = (1 << segnbits);
- pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) *
- pci_mmcfg_config_num, GFP_KERNEL);
- if (!pci_mmcfg_config)
+ if (extend_mmcfg(1 << segnbits) == -1)
return NULL;
- for (i = 0; i < (1 << segnbits); i++) {
- pci_mmcfg_config[i].address = base + (1<<28) * i;
- pci_mmcfg_config[i].pci_segment = i;
- pci_mmcfg_config[i].start_bus_number = 0;
- pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1;
- }
+ for (i = 0; i < (1 << segnbits); i++)
+ fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1);
return "AMD Family 10h NB";
}
+static bool __initdata mcp55_checked;
+static const char __init *pci_mmcfg_nvidia_mcp55(void)
+{
+ int bus;
+ int mcp55_mmconf_found = 0;
+
+ static const u32 extcfg_regnum = 0x90;
+ static const u32 extcfg_regsize = 4;
+ static const u32 extcfg_enable_mask = 1<<31;
+ static const u32 extcfg_start_mask = 0xff<<16;
+ static const int extcfg_start_shift = 16;
+ static const u32 extcfg_size_mask = 0x3<<28;
+ static const int extcfg_size_shift = 28;
+ static const int extcfg_sizebus[] = {0x100, 0x80, 0x40, 0x20};
+ static const u32 extcfg_base_mask[] = {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff};
+ static const int extcfg_base_lshift = 25;
+
+ /*
+ * do check if amd fam10h already took over
+ */
+ if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
+ return NULL;
+
+ mcp55_checked = true;
+ for (bus = 0; bus < 256; bus++) {
+ u64 base;
+ u32 l, extcfg;
+ u16 vendor, device;
+ int start, size_index, end;
+
+ raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l);
+ vendor = l & 0xffff;
+ device = (l >> 16) & 0xffff;
+
+ if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device)
+ continue;
+
+ raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum,
+ extcfg_regsize, &extcfg);
+
+ if (!(extcfg & extcfg_enable_mask))
+ continue;
+
+ if (extend_mmcfg(1) == -1)
+ continue;
+
+ size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift;
+ base = extcfg & extcfg_base_mask[size_index];
+ /* base could > 4G */
+ base <<= extcfg_base_lshift;
+ start = (extcfg & extcfg_start_mask) >> extcfg_start_shift;
+ end = start + extcfg_sizebus[size_index] - 1;
+ fill_one_mmcfg(base, 0, start, end);
+ mcp55_mmconf_found++;
+ }
+
+ if (!mcp55_mmconf_found)
+ return NULL;
+
+ return "nVidia MCP55";
+}
+
struct pci_mmcfg_hostbridge_probe {
u32 bus;
u32 devfn;
@@ -171,8 +245,52 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
0x1200, pci_mmcfg_amd_fam10h },
{ 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD,
0x1200, pci_mmcfg_amd_fam10h },
+ { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA,
+ 0x0369, pci_mmcfg_nvidia_mcp55 },
};
+static int __init cmp_mmcfg(const void *x1, const void *x2)
+{
+ const typeof(pci_mmcfg_config[0]) *m1 = x1;
+ const typeof(pci_mmcfg_config[0]) *m2 = x2;
+ int start1, start2;
+
+ start1 = m1->start_bus_number;
+ start2 = m2->start_bus_number;
+
+ return start1 - start2;
+}
+
+static void __init pci_mmcfg_check_end_bus_number(void)
+{
+ int i;
+ typeof(pci_mmcfg_config[0]) *cfg, *cfgx;
+
+ /* sort them at first */
+ sort(pci_mmcfg_config, pci_mmcfg_config_num,
+ sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL);
+
+ /* last one*/
+ if (pci_mmcfg_config_num > 0) {
+ i = pci_mmcfg_config_num - 1;
+ cfg = &pci_mmcfg_config[i];
+ if (cfg->end_bus_number < cfg->start_bus_number)
+ cfg->end_bus_number = 255;
+ }
+
+ /* don't overlap please */
+ for (i = 0; i < pci_mmcfg_config_num - 1; i++) {
+ cfg = &pci_mmcfg_config[i];
+ cfgx = &pci_mmcfg_config[i+1];
+
+ if (cfg->end_bus_number < cfg->start_bus_number)
+ cfg->end_bus_number = 255;
+
+ if (cfg->end_bus_number >= cfgx->start_bus_number)
+ cfg->end_bus_number = cfgx->start_bus_number - 1;
+ }
+}
+
static int __init pci_mmcfg_check_hostbridge(void)
{
u32 l;
@@ -186,31 +304,33 @@ static int __init pci_mmcfg_check_hostbridge(void)
pci_mmcfg_config_num = 0;
pci_mmcfg_config = NULL;
- name = NULL;
- for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
+ for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
bus = pci_mmcfg_probes[i].bus;
devfn = pci_mmcfg_probes[i].devfn;
raw_pci_ops->read(0, bus, devfn, 0, 4, &l);
vendor = l & 0xffff;
device = (l >> 16) & 0xffff;
+ name = NULL;
if (pci_mmcfg_probes[i].vendor == vendor &&
pci_mmcfg_probes[i].device == device)
name = pci_mmcfg_probes[i].probe();
- }
- if (name) {
- printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n",
- name, pci_mmcfg_config_num ? "with" : "without");
+ if (name)
+ printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n",
+ name);
}
- return name != NULL;
+ /* some end_bus_number is crazy, fix it */
+ pci_mmcfg_check_end_bus_number();
+
+ return pci_mmcfg_config_num != 0;
}
static void __init pci_mmcfg_insert_resources(void)
{
-#define PCI_MMCFG_RESOURCE_NAME_LEN 19
+#define PCI_MMCFG_RESOURCE_NAME_LEN 24
int i;
struct resource *res;
char *names;
@@ -228,9 +348,10 @@ static void __init pci_mmcfg_insert_resources(void)
struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
res->name = names;
- snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
- cfg->pci_segment);
- res->start = cfg->address;
+ snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN,
+ "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment,
+ cfg->start_bus_number, cfg->end_bus_number);
+ res->start = cfg->address + (cfg->start_bus_number << 20);
res->end = res->start + (num_buses << 20) - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
insert_resource(&iomem_resource, res);
@@ -354,8 +475,6 @@ static void __init pci_mmcfg_reject_broken(int early)
(pci_mmcfg_config[0].address == 0))
return;
- cfg = &pci_mmcfg_config[0];
-
for (i = 0; i < pci_mmcfg_config_num; i++) {
int valid = 0;
u64 addr, size;
@@ -423,10 +542,10 @@ static void __init __pci_mmcfg_init(int early)
known_bridge = 1;
}
- if (!known_bridge) {
+ if (!known_bridge)
acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
- pci_mmcfg_reject_broken(early);
- }
+
+ pci_mmcfg_reject_broken(early);
if ((pci_mmcfg_config_num == 0) ||
(pci_mmcfg_config == NULL) ||
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 30007ffc8e11..94349f8b2f96 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -112,13 +112,18 @@ static struct pci_raw_ops pci_mmcfg = {
static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
{
void __iomem *addr;
- u32 size;
-
- size = (cfg->end_bus_number + 1) << 20;
- addr = ioremap_nocache(cfg->address, size);
+ u64 start, size;
+
+ start = cfg->start_bus_number;
+ start <<= 20;
+ start += cfg->address;
+ size = cfg->end_bus_number + 1 - cfg->start_bus_number;
+ size <<= 20;
+ addr = ioremap_nocache(start, size);
if (addr) {
printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
- cfg->address, cfg->address + size - 1);
+ start, start + size - 1);
+ addr -= cfg->start_bus_number << 20;
}
return addr;
}
@@ -157,7 +162,7 @@ void __init pci_mmcfg_arch_free(void)
for (i = 0; i < pci_mmcfg_config_num; ++i) {
if (pci_mmcfg_virt[i].virt) {
- iounmap(pci_mmcfg_virt[i].virt);
+ iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20));
pci_mmcfg_virt[i].virt = NULL;
pci_mmcfg_virt[i].cfg = NULL;
}
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 5b38a026d122..196f97d00956 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -66,11 +66,18 @@ struct acpi_pci_root {
struct acpi_device * device;
struct acpi_pci_id id;
struct pci_bus *bus;
+
+ u32 osc_support_set; /* _OSC state of support bits */
+ u32 osc_control_set; /* _OSC state of control bits */
+ u32 osc_control_qry; /* the latest _OSC query result */
+
+ u32 osc_queried:1; /* has _OSC control been queried? */
};
static LIST_HEAD(acpi_pci_roots);
static struct acpi_pci_driver *sub_driver;
+static DEFINE_MUTEX(osc_lock);
int acpi_pci_register_driver(struct acpi_pci_driver *driver)
{
@@ -185,6 +192,175 @@ static void acpi_pci_bridge_scan(struct acpi_device *device)
}
}
+static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
+ 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+
+static acpi_status acpi_pci_run_osc(acpi_handle handle,
+ const u32 *capbuf, u32 *retval)
+{
+ acpi_status status;
+ struct acpi_object_list input;
+ union acpi_object in_params[4];
+ struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *out_obj;
+ u32 errors;
+
+ /* Setting up input parameters */
+ input.count = 4;
+ input.pointer = in_params;
+ in_params[0].type = ACPI_TYPE_BUFFER;
+ in_params[0].buffer.length = 16;
+ in_params[0].buffer.pointer = OSC_UUID;
+ in_params[1].type = ACPI_TYPE_INTEGER;
+ in_params[1].integer.value = 1;
+ in_params[2].type = ACPI_TYPE_INTEGER;
+ in_params[2].integer.value = 3;
+ in_params[3].type = ACPI_TYPE_BUFFER;
+ in_params[3].buffer.length = 12;
+ in_params[3].buffer.pointer = (u8 *)capbuf;
+
+ status = acpi_evaluate_object(handle, "_OSC", &input, &output);
+ if (ACPI_FAILURE(status))
+ return status;
+
+ if (!output.length)
+ return AE_NULL_OBJECT;
+
+ out_obj = output.pointer;
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ printk(KERN_DEBUG "_OSC evaluation returned wrong type\n");
+ status = AE_TYPE;
+ goto out_kfree;
+ }
+ /* Need to ignore the bit0 in result code */
+ errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+ if (errors) {
+ if (errors & OSC_REQUEST_ERROR)
+ printk(KERN_DEBUG "_OSC request failed\n");
+ if (errors & OSC_INVALID_UUID_ERROR)
+ printk(KERN_DEBUG "_OSC invalid UUID\n");
+ if (errors & OSC_INVALID_REVISION_ERROR)
+ printk(KERN_DEBUG "_OSC invalid revision\n");
+ if (errors & OSC_CAPABILITIES_MASK_ERROR) {
+ if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE)
+ goto out_success;
+ printk(KERN_DEBUG
+ "Firmware did not grant requested _OSC control\n");
+ status = AE_SUPPORT;
+ goto out_kfree;
+ }
+ status = AE_ERROR;
+ goto out_kfree;
+ }
+out_success:
+ *retval = *((u32 *)(out_obj->buffer.pointer + 8));
+ status = AE_OK;
+
+out_kfree:
+ kfree(output.pointer);
+ return status;
+}
+
+static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags)
+{
+ acpi_status status;
+ u32 support_set, result, capbuf[3];
+
+ /* do _OSC query for all possible controls */
+ support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS);
+ capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+ capbuf[OSC_SUPPORT_TYPE] = support_set;
+ capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+
+ status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
+ if (ACPI_SUCCESS(status)) {
+ root->osc_support_set = support_set;
+ root->osc_control_qry = result;
+ root->osc_queried = 1;
+ }
+ return status;
+}
+
+static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
+{
+ acpi_status status;
+ acpi_handle tmp;
+
+ status = acpi_get_handle(root->device->handle, "_OSC", &tmp);
+ if (ACPI_FAILURE(status))
+ return status;
+ mutex_lock(&osc_lock);
+ status = acpi_pci_query_osc(root, flags);
+ mutex_unlock(&osc_lock);
+ return status;
+}
+
+static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
+{
+ struct acpi_pci_root *root;
+ list_for_each_entry(root, &acpi_pci_roots, node) {
+ if (root->device->handle == handle)
+ return root;
+ }
+ return NULL;
+}
+
+/**
+ * acpi_pci_osc_control_set - commit requested control to Firmware
+ * @handle: acpi_handle for the target ACPI object
+ * @flags: driver's requested control bits
+ *
+ * Attempt to take control from Firmware on requested control bits.
+ **/
+acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
+{
+ acpi_status status;
+ u32 control_req, result, capbuf[3];
+ acpi_handle tmp;
+ struct acpi_pci_root *root;
+
+ status = acpi_get_handle(handle, "_OSC", &tmp);
+ if (ACPI_FAILURE(status))
+ return status;
+
+ control_req = (flags & OSC_CONTROL_MASKS);
+ if (!control_req)
+ return AE_TYPE;
+
+ root = acpi_pci_find_root(handle);
+ if (!root)
+ return AE_NOT_EXIST;
+
+ mutex_lock(&osc_lock);
+ /* No need to evaluate _OSC if the control was already granted. */
+ if ((root->osc_control_set & control_req) == control_req)
+ goto out;
+
+ /* Need to query controls first before requesting them */
+ if (!root->osc_queried) {
+ status = acpi_pci_query_osc(root, root->osc_support_set);
+ if (ACPI_FAILURE(status))
+ goto out;
+ }
+ if ((root->osc_control_qry & control_req) != control_req) {
+ printk(KERN_DEBUG
+ "Firmware did not grant requested _OSC control\n");
+ status = AE_SUPPORT;
+ goto out;
+ }
+
+ capbuf[OSC_QUERY_TYPE] = 0;
+ capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set;
+ capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req;
+ status = acpi_pci_run_osc(handle, capbuf, &result);
+ if (ACPI_SUCCESS(status))
+ root->osc_control_set = result;
+out:
+ mutex_unlock(&osc_lock);
+ return status;
+}
+EXPORT_SYMBOL(acpi_pci_osc_control_set);
+
static int __devinit acpi_pci_root_add(struct acpi_device *device)
{
int result = 0;
@@ -217,7 +393,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
* PCI domains, so we indicate this in _OSC support capabilities.
*/
flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
- pci_acpi_osc_support(device->handle, flags);
+ acpi_pci_osc_support(root, flags);
/*
* Segment
@@ -353,7 +529,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
if (pci_msi_enabled())
flags |= OSC_MSI_SUPPORT;
if (flags != base_flags)
- pci_acpi_osc_support(device->handle, flags);
+ acpi_pci_osc_support(root, flags);
end:
if (result) {
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2a4501dd2515..fdc864f9cf23 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -59,3 +59,13 @@ config HT_IRQ
This allows native hypertransport devices to use interrupts.
If unsure say Y.
+
+config PCI_IOV
+ bool "PCI IOV support"
+ depends on PCI
+ help
+ I/O Virtualization is a PCI feature supported by some devices
+ which allows them to create virtual devices which share their
+ physical resources.
+
+ If unsure, say N.
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 3d07ce24f6a8..ba6af162fd39 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+obj-$(CONFIG_PCI_IOV) += iov.o
+
#
# Some architectures use the generic PCI setup functions
#
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 52b54f053be0..68f91a252595 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus)
*
* Call hotplug for each new devices.
*/
-void pci_bus_add_devices(struct pci_bus *bus)
+void pci_bus_add_devices(const struct pci_bus *bus)
{
struct pci_dev *dev;
struct pci_bus *child;
@@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus)
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->subordinate) {
- retval = pci_enable_device(dev);
- pci_set_master(dev);
+ if (atomic_read(&dev->enable_cnt) == 0) {
+ retval = pci_enable_device(dev);
+ pci_set_master(dev);
+ }
pci_enable_bridges(dev->subordinate);
}
}
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 1c1141801060..fbc63d5e459f 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -30,9 +30,8 @@
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
+#include <linux/acpi.h>
#include <linux/pci-acpi.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
#define MY_NAME "acpi_pcihp"
@@ -333,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
{
acpi_status status = AE_NOT_FOUND;
acpi_handle handle, phandle;
- struct pci_bus *pbus = bus;
- struct pci_dev *pdev;
-
- do {
- pdev = pbus->self;
- if (!pdev) {
- handle = acpi_get_pci_rootbridge_handle(
- pci_domain_nr(pbus), pbus->number);
+ struct pci_bus *pbus;
+
+ handle = NULL;
+ for (pbus = bus; pbus; pbus = pbus->parent) {
+ handle = acpi_pci_get_bridge_handle(pbus);
+ if (handle)
break;
- }
- handle = DEVICE_ACPI_HANDLE(&(pdev->dev));
- pbus = pbus->parent;
- } while (!handle);
+ }
/*
* _HPP settings apply to all child buses, until another _HPP is
@@ -378,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
*
* Attempt to take hotplug control from firmware.
*/
-int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
{
acpi_status status;
acpi_handle chandle, handle;
- struct pci_dev *pdev = dev;
- struct pci_bus *parent;
struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
@@ -408,33 +400,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
dbg("Trying to get hotplug control for %s\n",
(char *)string.pointer);
- status = pci_osc_control_set(handle, flags);
+ status = acpi_pci_osc_control_set(handle, flags);
if (ACPI_SUCCESS(status))
goto got_one;
kfree(string.pointer);
string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
}
- pdev = dev;
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
- while (!handle) {
+ handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ if (!handle) {
/*
* This hotplug controller was not listed in the ACPI name
* space at all. Try to get acpi handle of parent pci bus.
*/
- if (!pdev || !pdev->bus->parent)
- break;
- parent = pdev->bus->parent;
- dbg("Could not find %s in acpi namespace, trying parent\n",
- pci_name(pdev));
- if (!parent->self)
- /* Parent must be a host bridge */
- handle = acpi_get_pci_rootbridge_handle(
- pci_domain_nr(parent),
- parent->number);
- else
- handle = DEVICE_ACPI_HANDLE(&(parent->self->dev));
- pdev = parent->self;
+ struct pci_bus *pbus;
+ for (pbus = pdev->bus; pbus; pbus = pbus->parent) {
+ handle = acpi_pci_get_bridge_handle(pbus);
+ if (handle)
+ break;
+ }
}
while (handle) {
@@ -453,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
}
dbg("Cannot get control of hotplug hardware for pci %s\n",
- pci_name(dev));
+ pci_name(pdev));
kfree(string.pointer);
return -ENODEV;
got_one:
- dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev),
- (char *)string.pointer);
+ dbg("Gained control for hotplug HW for pci %s (%s)\n",
+ pci_name(pdev), (char *)string.pointer);
kfree(string.pointer);
return 0;
}
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index d8649e127298..6151389fd903 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -1,395 +1,163 @@
-/*
- * Fake PCI Hot Plug Controller Driver
+/* Works like the fakephp driver used to, except a little better.
*
- * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
- * Copyright (C) 2003 IBM Corp.
- * Copyright (C) 2003 Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * - It's possible to remove devices with subordinate busses.
+ * - New PCI devices that appear via any method, not just a fakephp triggered
+ * rescan, will be noticed.
+ * - Devices that are removed via any method, not just a fakephp triggered
+ * removal, will also be noticed.
*
- * Based on ideas and code from:
- * Vladimir Kondratiev <vladimir.kondratiev@intel.com>
- * Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * Uses nothing from the pci-hotplug subsystem.
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2 of the License.
- *
- * Send feedback to <greg@kroah.com>
*/
-/*
- *
- * This driver will "emulate" removing PCI devices from the system. If
- * the "power" file is written to with "0" then the specified PCI device
- * will be completely removed from the kernel.
- *
- * WARNING, this does NOT turn off the power to the PCI device. This is
- * a "logical" removal, not a physical or electrical removal.
- *
- * Use this module at your own risk, you have been warned!
- *
- * Enabling PCI devices is left as an exercise for the reader...
- *
- */
-#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/pci_hotplug.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/device.h>
#include "../pci.h"
-#if !defined(MODULE)
- #define MY_NAME "fakephp"
-#else
- #define MY_NAME THIS_MODULE->name
-#endif
-
-#define dbg(format, arg...) \
- do { \
- if (debug) \
- printk(KERN_DEBUG "%s: " format, \
- MY_NAME , ## arg); \
- } while (0)
-#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
-#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg)
-
-#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>"
-#define DRIVER_DESC "Fake PCI Hot Plug Controller Driver"
-
-struct dummy_slot {
- struct list_head node;
- struct hotplug_slot *slot;
- struct pci_dev *dev;
- struct work_struct remove_work;
- unsigned long removed;
+struct legacy_slot {
+ struct kobject kobj;
+ struct pci_dev *dev;
+ struct list_head list;
};
-static int debug;
-static int dup_slots;
-static LIST_HEAD(slot_list);
-static struct workqueue_struct *dummyphp_wq;
-
-static void pci_rescan_worker(struct work_struct *work);
-static DECLARE_WORK(pci_rescan_work, pci_rescan_worker);
-
-static int enable_slot (struct hotplug_slot *slot);
-static int disable_slot (struct hotplug_slot *slot);
+static LIST_HEAD(legacy_list);
-static struct hotplug_slot_ops dummy_hotplug_slot_ops = {
- .owner = THIS_MODULE,
- .enable_slot = enable_slot,
- .disable_slot = disable_slot,
-};
-
-static void dummy_release(struct hotplug_slot *slot)
+static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct dummy_slot *dslot = slot->private;
-
- list_del(&dslot->node);
- kfree(dslot->slot->info);
- kfree(dslot->slot);
- pci_dev_put(dslot->dev);
- kfree(dslot);
+ struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+ strcpy(buf, "1\n");
+ return 2;
}
-#define SLOT_NAME_SIZE 8
-
-static int add_slot(struct pci_dev *dev)
+static void remove_callback(void *data)
{
- struct dummy_slot *dslot;
- struct hotplug_slot *slot;
- char name[SLOT_NAME_SIZE];
- int retval = -ENOMEM;
- static int count = 1;
-
- slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
- if (!slot)
- goto error;
-
- slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
- if (!slot->info)
- goto error_slot;
-
- slot->info->power_status = 1;
- slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
- slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
-
- dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL);
- if (!dslot)
- goto error_info;
-
- if (dup_slots)
- snprintf(name, SLOT_NAME_SIZE, "fake");
- else
- snprintf(name, SLOT_NAME_SIZE, "fake%d", count++);
- dbg("slot->name = %s\n", name);
- slot->ops = &dummy_hotplug_slot_ops;
- slot->release = &dummy_release;
- slot->private = dslot;
-
- retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name);
- if (retval) {
- err("pci_hp_register failed with error %d\n", retval);
- goto error_dslot;
- }
-
- dbg("slot->name = %s\n", hotplug_slot_name(slot));
- dslot->slot = slot;
- dslot->dev = pci_dev_get(dev);
- list_add (&dslot->node, &slot_list);
- return retval;
-
-error_dslot:
- kfree(dslot);
-error_info:
- kfree(slot->info);
-error_slot:
- kfree(slot);
-error:
- return retval;
+ pci_remove_bus_device((struct pci_dev *)data);
}
-static int __init pci_scan_buses(void)
+static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
{
- struct pci_dev *dev = NULL;
- int lastslot = 0;
+ struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+ unsigned long val;
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- if (PCI_FUNC(dev->devfn) > 0 &&
- lastslot == PCI_SLOT(dev->devfn))
- continue;
- lastslot = PCI_SLOT(dev->devfn);
- add_slot(dev);
- }
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
- return 0;
+ if (val)
+ pci_rescan_bus(slot->dev->bus);
+ else
+ sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback,
+ slot->dev, THIS_MODULE);
+ return len;
}
-static void remove_slot(struct dummy_slot *dslot)
-{
- int retval;
-
- dbg("removing slot %s\n", hotplug_slot_name(dslot->slot));
- retval = pci_hp_deregister(dslot->slot);
- if (retval)
- err("Problem unregistering a slot %s\n",
- hotplug_slot_name(dslot->slot));
-}
+static struct attribute *legacy_attrs[] = {
+ &(struct attribute){ .name = "power", .mode = 0644 },
+ NULL,
+};
-/* called from the single-threaded workqueue handler to remove a slot */
-static void remove_slot_worker(struct work_struct *work)
+static void legacy_release(struct kobject *kobj)
{
- struct dummy_slot *dslot =
- container_of(work, struct dummy_slot, remove_work);
- remove_slot(dslot);
-}
+ struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
-/**
- * pci_rescan_slot - Rescan slot
- * @temp: Device template. Should be set: bus and devfn.
- *
- * Tries hard not to re-enable already existing devices;
- * also handles scanning of subfunctions.
- */
-static int pci_rescan_slot(struct pci_dev *temp)
-{
- struct pci_bus *bus = temp->bus;
- struct pci_dev *dev;
- int func;
- u8 hdr_type;
- int count = 0;
-
- if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) {
- temp->hdr_type = hdr_type & 0x7f;
- if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
- pci_dev_put(dev);
- else {
- dev = pci_scan_single_device(bus, temp->devfn);
- if (dev) {
- dbg("New device on %s function %x:%x\n",
- bus->name, temp->devfn >> 3,
- temp->devfn & 7);
- count++;
- }
- }
- /* multifunction device? */
- if (!(hdr_type & 0x80))
- return count;
-
- /* continue scanning for other functions */
- for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) {
- if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
- continue;
- temp->hdr_type = hdr_type & 0x7f;
-
- if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
- pci_dev_put(dev);
- else {
- dev = pci_scan_single_device(bus, temp->devfn);
- if (dev) {
- dbg("New device on %s function %x:%x\n",
- bus->name, temp->devfn >> 3,
- temp->devfn & 7);
- count++;
- }
- }
- }
- }
-
- return count;
+ pci_dev_put(slot->dev);
+ kfree(slot);
}
+static struct kobj_type legacy_ktype = {
+ .sysfs_ops = &(struct sysfs_ops){
+ .store = legacy_store, .show = legacy_show
+ },
+ .release = &legacy_release,
+ .default_attrs = legacy_attrs,
+};
-/**
- * pci_rescan_bus - Rescan PCI bus
- * @bus: the PCI bus to rescan
- *
- * Call pci_rescan_slot for each possible function of the bus.
- */
-static void pci_rescan_bus(const struct pci_bus *bus)
+static int legacy_add_slot(struct pci_dev *pdev)
{
- unsigned int devfn;
- struct pci_dev *dev;
- int retval;
- int found = 0;
- dev = alloc_pci_dev();
- if (!dev)
- return;
+ struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL);
- dev->bus = (struct pci_bus*)bus;
- dev->sysdata = bus->sysdata;
- for (devfn = 0; devfn < 0x100; devfn += 8) {
- dev->devfn = devfn;
- found += pci_rescan_slot(dev);
- }
-
- if (found) {
- pci_bus_assign_resources(bus);
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Skip already-added devices */
- if (dev->is_added)
- continue;
- retval = pci_bus_add_device(dev);
- if (retval)
- dev_err(&dev->dev,
- "Error adding device, continuing\n");
- else
- add_slot(dev);
- }
- pci_bus_add_devices(bus);
- }
- kfree(dev);
-}
+ if (!slot)
+ return -ENOMEM;
-/* recursively scan all buses */
-static void pci_rescan_buses(const struct list_head *list)
-{
- const struct list_head *l;
- list_for_each(l,list) {
- const struct pci_bus *b = pci_bus_b(l);
- pci_rescan_bus(b);
- pci_rescan_buses(&b->children);
+ if (kobject_init_and_add(&slot->kobj, &legacy_ktype,
+ &pci_slots_kset->kobj, "%s",
+ dev_name(&pdev->dev))) {
+ dev_warn(&pdev->dev, "Failed to created legacy fake slot\n");
+ return -EINVAL;
}
-}
+ slot->dev = pci_dev_get(pdev);
-/* initiate rescan of all pci buses */
-static inline void pci_rescan(void) {
- pci_rescan_buses(&pci_root_buses);
-}
-
-/* called from the single-threaded workqueue handler to rescan all pci buses */
-static void pci_rescan_worker(struct work_struct *work)
-{
- pci_rescan();
-}
+ list_add(&slot->list, &legacy_list);
-static int enable_slot(struct hotplug_slot *hotplug_slot)
-{
- /* mis-use enable_slot for rescanning of the pci bus */
- cancel_work_sync(&pci_rescan_work);
- queue_work(dummyphp_wq, &pci_rescan_work);
return 0;
}
-static int disable_slot(struct hotplug_slot *slot)
+static int legacy_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
{
- struct dummy_slot *dslot;
- struct pci_dev *dev;
- int func;
-
- if (!slot)
- return -ENODEV;
- dslot = slot->private;
-
- dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot));
+ struct pci_dev *pdev = to_pci_dev(data);
- for (func = 7; func >= 0; func--) {
- dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func);
- if (!dev)
- continue;
+ if (action == BUS_NOTIFY_ADD_DEVICE) {
+ legacy_add_slot(pdev);
+ } else if (action == BUS_NOTIFY_DEL_DEVICE) {
+ struct legacy_slot *slot;
- if (test_and_set_bit(0, &dslot->removed)) {
- dbg("Slot already scheduled for removal\n");
- pci_dev_put(dev);
- return -ENODEV;
- }
+ list_for_each_entry(slot, &legacy_list, list)
+ if (slot->dev == pdev)
+ goto found;
- /* remove the device from the pci core */
- pci_remove_bus_device(dev);
-
- /* queue work item to blow away this sysfs entry and other
- * parts.
- */
- INIT_WORK(&dslot->remove_work, remove_slot_worker);
- queue_work(dummyphp_wq, &dslot->remove_work);
-
- pci_dev_put(dev);
+ dev_warn(&pdev->dev, "Missing legacy fake slot?");
+ return -ENODEV;
+found:
+ kobject_del(&slot->kobj);
+ list_del(&slot->list);
+ kobject_put(&slot->kobj);
}
+
return 0;
}
-static void cleanup_slots (void)
-{
- struct list_head *tmp;
- struct list_head *next;
- struct dummy_slot *dslot;
-
- destroy_workqueue(dummyphp_wq);
- list_for_each_safe (tmp, next, &slot_list) {
- dslot = list_entry (tmp, struct dummy_slot, node);
- remove_slot(dslot);
- }
-
-}
+static struct notifier_block legacy_notifier = {
+ .notifier_call = legacy_notify
+};
-static int __init dummyphp_init(void)
+static int __init init_legacy(void)
{
- info(DRIVER_DESC "\n");
+ struct pci_dev *pdev = NULL;
- dummyphp_wq = create_singlethread_workqueue(MY_NAME);
- if (!dummyphp_wq)
- return -ENOMEM;
+ /* Add existing devices */
+ while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev)))
+ legacy_add_slot(pdev);
- return pci_scan_buses();
+ /* Be alerted of any new ones */
+ bus_register_notifier(&pci_bus_type, &legacy_notifier);
+ return 0;
}
+module_init(init_legacy);
-
-static void __exit dummyphp_exit(void)
+static void __exit remove_legacy(void)
{
- cleanup_slots();
+ struct legacy_slot *slot, *tmp;
+
+ bus_unregister_notifier(&pci_bus_type, &legacy_notifier);
+
+ list_for_each_entry_safe(slot, tmp, &legacy_list, list) {
+ list_del(&slot->list);
+ kobject_del(&slot->kobj);
+ kobject_put(&slot->kobj);
+ }
}
+module_exit(remove_legacy);
-module_init(dummyphp_init);
-module_exit(dummyphp_exit);
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Trent Piepho <xyzzy@speakeasy.org>");
+MODULE_DESCRIPTION("Legacy version of the fakephp interface");
MODULE_LICENSE("GPL");
-module_param(debug, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-module_param(dup_slots, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging");
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 39ae37589fda..0a368547e633 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -46,10 +46,10 @@ extern int pciehp_force;
extern struct workqueue_struct *pciehp_wq;
#define dbg(format, arg...) \
- do { \
- if (pciehp_debug) \
- printk("%s: " format, MY_NAME , ## arg); \
- } while (0)
+do { \
+ if (pciehp_debug) \
+ printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \
+} while (0)
#define err(format, arg...) \
printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
#define info(format, arg...) \
@@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq;
#define ctrl_dbg(ctrl, format, arg...) \
do { \
if (pciehp_debug) \
- dev_printk(, &ctrl->pcie->device, \
+ dev_printk(KERN_DEBUG, &ctrl->pcie->device, \
format, ## arg); \
} while (0)
#define ctrl_err(ctrl, format, arg...) \
@@ -108,10 +108,11 @@ struct controller {
u32 slot_cap;
u8 cap_base;
struct timer_list poll_timer;
- int cmd_busy;
+ unsigned int cmd_busy:1;
unsigned int no_cmd_complete:1;
unsigned int link_active_reporting:1;
unsigned int notification_enabled:1;
+ unsigned int power_fault_detected;
};
#define INT_BUTTON_IGNORE 0
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 438d795f9fe3..96048010e7d9 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -67,37 +67,27 @@ static int __init parse_detect_mode(void)
return PCIEHP_DETECT_DEFAULT;
}
-static struct pcie_port_service_id __initdata port_pci_ids[] = {
- {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_ANY_PORT,
- .service_type = PCIE_PORT_SERVICE_HP,
- .driver_data = 0,
- }, { /* end: all zeroes */ }
-};
-
static int __initdata dup_slot_id;
static int __initdata acpi_slot_detected;
static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots);
/* Dummy driver for dumplicate name detection */
-static int __init dummy_probe(struct pcie_device *dev,
- const struct pcie_port_service_id *id)
+static int __init dummy_probe(struct pcie_device *dev)
{
int pos;
u32 slot_cap;
struct slot *slot, *tmp;
struct pci_dev *pdev = dev->port;
struct pci_bus *pbus = pdev->subordinate;
- if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL)))
- return -ENOMEM;
/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
if (pciehp_get_hp_hw_control_from_firmware(pdev))
return -ENODEV;
if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP)))
return -ENODEV;
pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap);
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot)
+ return -ENOMEM;
slot->number = slot_cap >> 19;
list_for_each_entry(tmp, &dummy_slots, slot_list) {
if (tmp->number == slot->number)
@@ -111,7 +101,8 @@ static int __init dummy_probe(struct pcie_device *dev,
static struct pcie_port_service_driver __initdata dummy_driver = {
.name = "pciehp_dummy",
- .id_table = port_pci_ids,
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_HP,
.probe = dummy_probe,
};
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 681e3912b821..fb254b2454de 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
return 0;
}
-static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id)
+static int pciehp_probe(struct pcie_device *dev)
{
int rc;
struct controller *ctrl;
@@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev)
}
#ifdef CONFIG_PM
-static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
+static int pciehp_suspend (struct pcie_device *dev)
{
dev_info(&dev->device, "%s ENTRY\n", __func__);
return 0;
@@ -503,20 +503,12 @@ static int pciehp_resume (struct pcie_device *dev)
}
return 0;
}
-#endif
-
-static struct pcie_port_service_id port_pci_ids[] = { {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_ANY_PORT,
- .service_type = PCIE_PORT_SERVICE_HP,
- .driver_data = 0,
- }, { /* end: all zeroes */ }
-};
+#endif /* PM */
static struct pcie_port_service_driver hpdriver_portdrv = {
.name = PCIE_MODULE_NAME,
- .id_table = &port_pci_ids[0],
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_HP,
.probe = pciehp_probe,
.remove = pciehp_remove,
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 7a16c6897bb9..07bd32151146 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot)
slot_cmd = POWER_ON;
cmd_mask = PCI_EXP_SLTCTL_PCC;
- /* Enable detection that we turned off at slot power-off time */
if (!pciehp_poll_mode) {
- slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
- cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
+ /* Enable power fault detection turned off at power off time */
+ slot_cmd |= PCI_EXP_SLTCTL_PFDE;
+ cmd_mask |= PCI_EXP_SLTCTL_PFDE;
}
retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-
if (retval) {
ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd);
- return -1;
+ return retval;
}
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
+ ctrl->power_fault_detected = 0;
return retval;
}
@@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot)
slot_cmd = POWER_OFF;
cmd_mask = PCI_EXP_SLTCTL_PCC;
- /*
- * If we get MRL or presence detect interrupts now, the isr
- * will notice the sticky power-fault bit too and issue power
- * indicator change commands. This will lead to an endless loop
- * of command completions, since the power-fault bit remains on
- * till the slot is powered on again.
- */
if (!pciehp_poll_mode) {
- slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
- cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
+ /* Disable power fault detection */
+ slot_cmd &= ~PCI_EXP_SLTCTL_PFDE;
+ cmd_mask |= PCI_EXP_SLTCTL_PFDE;
}
retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
@@ -672,10 +662,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
PCI_EXP_SLTSTA_CC);
+ detected &= ~intr_loc;
intr_loc |= detected;
if (!intr_loc)
return IRQ_NONE;
- if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) {
+ if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) {
ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
__func__);
return IRQ_NONE;
@@ -709,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
pciehp_handle_presence_change(p_slot);
/* Check Power Fault Detected */
- if (intr_loc & PCI_EXP_SLTSTA_PFD)
+ if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
+ ctrl->power_fault_detected = 1;
pciehp_handle_power_fault(p_slot);
-
+ }
return IRQ_HANDLED;
}
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 6aba0b6cf2e0..974e924ca96d 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -48,10 +48,10 @@ extern int shpchp_debug;
extern struct workqueue_struct *shpchp_wq;
#define dbg(format, arg...) \
- do { \
- if (shpchp_debug) \
- printk("%s: " format, MY_NAME , ## arg); \
- } while (0)
+do { \
+ if (shpchp_debug) \
+ printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \
+} while (0)
#define err(format, arg...) \
printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
#define info(format, arg...) \
@@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq;
#define ctrl_dbg(ctrl, format, arg...) \
do { \
if (shpchp_debug) \
- dev_printk(, &ctrl->pci_dev->dev, \
+ dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev, \
format, ## arg); \
} while (0)
#define ctrl_err(ctrl, format, arg...) \
diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c
index 138f161becc0..aa315e52529b 100644
--- a/drivers/pci/hotplug/shpchp_pci.c
+++ b/drivers/pci/hotplug/shpchp_pci.c
@@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot)
busnr))
break;
}
- if (busnr >= end) {
+ if (busnr > end) {
ctrl_err(ctrl,
"No free bus for hot-added bridge\n");
pci_dev_put(dev);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 49402c399232..9dbd5066acaf 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1782,7 +1782,7 @@ static inline void iommu_prepare_isa(void)
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
if (ret)
- printk("IOMMU: Failed to create 0-64M identity map, "
+ printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
"floppy might not work\n");
}
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
new file mode 100644
index 000000000000..7227efc760db
--- /dev/null
+++ b/drivers/pci/iov.c
@@ -0,0 +1,680 @@
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ *
+ * PCI Express I/O Virtualization (IOV) support.
+ * Single Root IOV 1.0
+ */
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+#define VIRTFN_ID_LEN 16
+
+static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+{
+ return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+ dev->sriov->stride * id) >> 8);
+}
+
+static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+{
+ return (dev->devfn + dev->sriov->offset +
+ dev->sriov->stride * id) & 0xff;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+ int rc;
+ struct pci_bus *child;
+
+ if (bus->number == busnr)
+ return bus;
+
+ child = pci_find_bus(pci_domain_nr(bus), busnr);
+ if (child)
+ return child;
+
+ child = pci_add_new_bus(bus, NULL, busnr);
+ if (!child)
+ return NULL;
+
+ child->subordinate = busnr;
+ child->dev.parent = bus->bridge;
+ rc = pci_bus_add_child(child);
+ if (rc) {
+ pci_remove_bus(child);
+ return NULL;
+ }
+
+ return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
+{
+ struct pci_bus *child;
+
+ if (bus->number == busnr)
+ return;
+
+ child = pci_find_bus(pci_domain_nr(bus), busnr);
+ BUG_ON(!child);
+
+ if (list_empty(&child->devices))
+ pci_remove_bus(child);
+}
+
+static int virtfn_add(struct pci_dev *dev, int id, int reset)
+{
+ int i;
+ int rc;
+ u64 size;
+ char buf[VIRTFN_ID_LEN];
+ struct pci_dev *virtfn;
+ struct resource *res;
+ struct pci_sriov *iov = dev->sriov;
+
+ virtfn = alloc_pci_dev();
+ if (!virtfn)
+ return -ENOMEM;
+
+ mutex_lock(&iov->dev->sriov->lock);
+ virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+ if (!virtfn->bus) {
+ kfree(virtfn);
+ mutex_unlock(&iov->dev->sriov->lock);
+ return -ENOMEM;
+ }
+ virtfn->devfn = virtfn_devfn(dev, id);
+ virtfn->vendor = dev->vendor;
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
+ pci_setup_device(virtfn);
+ virtfn->dev.parent = dev->dev.parent;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ if (!res->parent)
+ continue;
+ virtfn->resource[i].name = pci_name(virtfn);
+ virtfn->resource[i].flags = res->flags;
+ size = resource_size(res);
+ do_div(size, iov->total);
+ virtfn->resource[i].start = res->start + size * id;
+ virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+ rc = request_resource(res, &virtfn->resource[i]);
+ BUG_ON(rc);
+ }
+
+ if (reset)
+ pci_execute_reset_function(virtfn);
+
+ pci_device_add(virtfn, virtfn->bus);
+ mutex_unlock(&iov->dev->sriov->lock);
+
+ virtfn->physfn = pci_dev_get(dev);
+ virtfn->is_virtfn = 1;
+
+ rc = pci_bus_add_device(virtfn);
+ if (rc)
+ goto failed1;
+ sprintf(buf, "virtfn%u", id);
+ rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+ if (rc)
+ goto failed1;
+ rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+ if (rc)
+ goto failed2;
+
+ kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+ return 0;
+
+failed2:
+ sysfs_remove_link(&dev->dev.kobj, buf);
+failed1:
+ pci_dev_put(dev);
+ mutex_lock(&iov->dev->sriov->lock);
+ pci_remove_bus_device(virtfn);
+ virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+ mutex_unlock(&iov->dev->sriov->lock);
+
+ return rc;
+}
+
+static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+{
+ char buf[VIRTFN_ID_LEN];
+ struct pci_bus *bus;
+ struct pci_dev *virtfn;
+ struct pci_sriov *iov = dev->sriov;
+
+ bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
+ if (!bus)
+ return;
+
+ virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
+ if (!virtfn)
+ return;
+
+ pci_dev_put(virtfn);
+
+ if (reset) {
+ device_release_driver(&virtfn->dev);
+ pci_execute_reset_function(virtfn);
+ }
+
+ sprintf(buf, "virtfn%u", id);
+ sysfs_remove_link(&dev->dev.kobj, buf);
+ sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+ mutex_lock(&iov->dev->sriov->lock);
+ pci_remove_bus_device(virtfn);
+ virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+ mutex_unlock(&iov->dev->sriov->lock);
+
+ pci_dev_put(dev);
+}
+
+static int sriov_migration(struct pci_dev *dev)
+{
+ u16 status;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (!iov->nr_virtfn)
+ return 0;
+
+ if (!(iov->cap & PCI_SRIOV_CAP_VFM))
+ return 0;
+
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status);
+ if (!(status & PCI_SRIOV_STATUS_VFM))
+ return 0;
+
+ schedule_work(&iov->mtask);
+
+ return 1;
+}
+
+static void sriov_migration_task(struct work_struct *work)
+{
+ int i;
+ u8 state;
+ u16 status;
+ struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask);
+
+ for (i = iov->initial; i < iov->nr_virtfn; i++) {
+ state = readb(iov->mstate + i);
+ if (state == PCI_SRIOV_VFM_MI) {
+ writeb(PCI_SRIOV_VFM_AV, iov->mstate + i);
+ state = readb(iov->mstate + i);
+ if (state == PCI_SRIOV_VFM_AV)
+ virtfn_add(iov->self, i, 1);
+ } else if (state == PCI_SRIOV_VFM_MO) {
+ virtfn_remove(iov->self, i, 1);
+ writeb(PCI_SRIOV_VFM_UA, iov->mstate + i);
+ state = readb(iov->mstate + i);
+ if (state == PCI_SRIOV_VFM_AV)
+ virtfn_add(iov->self, i, 0);
+ }
+ }
+
+ pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status);
+ status &= ~PCI_SRIOV_STATUS_VFM;
+ pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status);
+}
+
+static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn)
+{
+ int bir;
+ u32 table;
+ resource_size_t pa;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (nr_virtfn <= iov->initial)
+ return 0;
+
+ pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table);
+ bir = PCI_SRIOV_VFM_BIR(table);
+ if (bir > PCI_STD_RESOURCE_END)
+ return -EIO;
+
+ table = PCI_SRIOV_VFM_OFFSET(table);
+ if (table + nr_virtfn > pci_resource_len(dev, bir))
+ return -EIO;
+
+ pa = pci_resource_start(dev, bir) + table;
+ iov->mstate = ioremap(pa, nr_virtfn);
+ if (!iov->mstate)
+ return -ENOMEM;
+
+ INIT_WORK(&iov->mtask, sriov_migration_task);
+
+ iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR;
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+ return 0;
+}
+
+static void sriov_disable_migration(struct pci_dev *dev)
+{
+ struct pci_sriov *iov = dev->sriov;
+
+ iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+ cancel_work_sync(&iov->mtask);
+ iounmap(iov->mstate);
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+ int rc;
+ int i, j;
+ int nres;
+ u16 offset, stride, initial;
+ struct resource *res;
+ struct pci_dev *pdev;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (!nr_virtfn)
+ return 0;
+
+ if (iov->nr_virtfn)
+ return -EINVAL;
+
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+ if (initial > iov->total ||
+ (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
+ return -EIO;
+
+ if (nr_virtfn < 0 || nr_virtfn > iov->total ||
+ (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+ return -EINVAL;
+
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
+ if (!offset || (nr_virtfn > 1 && !stride))
+ return -EIO;
+
+ nres = 0;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ if (res->parent)
+ nres++;
+ }
+ if (nres != iov->nres) {
+ dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
+ return -ENOMEM;
+ }
+
+ iov->offset = offset;
+ iov->stride = stride;
+
+ if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
+ dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+ return -ENOMEM;
+ }
+
+ if (iov->link != dev->devfn) {
+ pdev = pci_get_slot(dev->bus, iov->link);
+ if (!pdev)
+ return -ENODEV;
+
+ pci_dev_put(pdev);
+
+ if (!pdev->is_physfn)
+ return -ENODEV;
+
+ rc = sysfs_create_link(&dev->dev.kobj,
+ &pdev->dev.kobj, "dep_link");
+ if (rc)
+ return rc;
+ }
+
+ iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+ pci_block_user_cfg_access(dev);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ msleep(100);
+ pci_unblock_user_cfg_access(dev);
+
+ iov->initial = initial;
+ if (nr_virtfn < initial)
+ initial = nr_virtfn;
+
+ for (i = 0; i < initial; i++) {
+ rc = virtfn_add(dev, i, 0);
+ if (rc)
+ goto failed;
+ }
+
+ if (iov->cap & PCI_SRIOV_CAP_VFM) {
+ rc = sriov_enable_migration(dev, nr_virtfn);
+ if (rc)
+ goto failed;
+ }
+
+ kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+ iov->nr_virtfn = nr_virtfn;
+
+ return 0;
+
+failed:
+ for (j = 0; j < i; j++)
+ virtfn_remove(dev, j, 0);
+
+ iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+ pci_block_user_cfg_access(dev);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ ssleep(1);
+ pci_unblock_user_cfg_access(dev);
+
+ if (iov->link != dev->devfn)
+ sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+ return rc;
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+ int i;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (!iov->nr_virtfn)
+ return;
+
+ if (iov->cap & PCI_SRIOV_CAP_VFM)
+ sriov_disable_migration(dev);
+
+ for (i = 0; i < iov->nr_virtfn; i++)
+ virtfn_remove(dev, i, 0);
+
+ iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+ pci_block_user_cfg_access(dev);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ ssleep(1);
+ pci_unblock_user_cfg_access(dev);
+
+ if (iov->link != dev->devfn)
+ sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+ iov->nr_virtfn = 0;
+}
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+ int i;
+ int rc;
+ int nres;
+ u32 pgsz;
+ u16 ctrl, total, offset, stride;
+ struct pci_sriov *iov;
+ struct resource *res;
+ struct pci_dev *pdev;
+
+ if (dev->pcie_type != PCI_EXP_TYPE_RC_END &&
+ dev->pcie_type != PCI_EXP_TYPE_ENDPOINT)
+ return -ENODEV;
+
+ pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+ if (ctrl & PCI_SRIOV_CTRL_VFE) {
+ pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+ ssleep(1);
+ }
+
+ pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+ if (!total)
+ return 0;
+
+ ctrl = 0;
+ list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+ if (pdev->is_physfn)
+ goto found;
+
+ pdev = NULL;
+ if (pci_ari_enabled(dev->bus))
+ ctrl |= PCI_SRIOV_CTRL_ARI;
+
+found:
+ pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+ pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
+ pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+ pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+ if (!offset || (total > 1 && !stride))
+ return -EIO;
+
+ pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+ i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+ pgsz &= ~((1 << i) - 1);
+ if (!pgsz)
+ return -EIO;
+
+ pgsz &= ~(pgsz - 1);
+ pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+ nres = 0;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ i += __pci_read_base(dev, pci_bar_unknown, res,
+ pos + PCI_SRIOV_BAR + i * 4);
+ if (!res->flags)
+ continue;
+ if (resource_size(res) & (PAGE_SIZE - 1)) {
+ rc = -EIO;
+ goto failed;
+ }
+ res->end = res->start + resource_size(res) * total - 1;
+ nres++;
+ }
+
+ iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+ if (!iov) {
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ iov->pos = pos;
+ iov->nres = nres;
+ iov->ctrl = ctrl;
+ iov->total = total;
+ iov->offset = offset;
+ iov->stride = stride;
+ iov->pgsz = pgsz;
+ iov->self = dev;
+ pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+ pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+ if (pdev)
+ iov->dev = pci_dev_get(pdev);
+ else {
+ iov->dev = dev;
+ mutex_init(&iov->lock);
+ }
+
+ dev->sriov = iov;
+ dev->is_physfn = 1;
+
+ return 0;
+
+failed:
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ res->flags = 0;
+ }
+
+ return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+ BUG_ON(dev->sriov->nr_virtfn);
+
+ if (dev == dev->sriov->dev)
+ mutex_destroy(&dev->sriov->lock);
+ else
+ pci_dev_put(dev->sriov->dev);
+
+ kfree(dev->sriov);
+ dev->sriov = NULL;
+}
+
+static void sriov_restore_state(struct pci_dev *dev)
+{
+ int i;
+ u16 ctrl;
+ struct pci_sriov *iov = dev->sriov;
+
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+ if (ctrl & PCI_SRIOV_CTRL_VFE)
+ return;
+
+ for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
+ pci_update_resource(dev, i);
+
+ pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+ msleep(100);
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+ int pos;
+
+ if (!dev->is_pcie)
+ return -ENODEV;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+ if (pos)
+ return sriov_init(dev, pos);
+
+ return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+ if (dev->is_physfn)
+ sriov_release(dev);
+}
+
+/**
+ * pci_iov_resource_bar - get position of the SR-IOV BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ * @type: the BAR type to be filled in
+ *
+ * Returns position of the BAR encapsulated in the SR-IOV capability.
+ */
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+ enum pci_bar_type *type)
+{
+ if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
+ return 0;
+
+ BUG_ON(!dev->is_physfn);
+
+ *type = pci_bar_unknown;
+
+ return dev->sriov->pos + PCI_SRIOV_BAR +
+ 4 * (resno - PCI_IOV_RESOURCES);
+}
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+ if (dev->is_physfn)
+ sriov_restore_state(dev);
+}
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+ int max = 0;
+ u8 busnr;
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (!dev->is_physfn)
+ continue;
+ busnr = virtfn_bus(dev, dev->sriov->total - 1);
+ if (busnr > max)
+ max = busnr;
+ }
+
+ return max ? max - bus->number : 0;
+}
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+ might_sleep();
+
+ if (!dev->is_physfn)
+ return -ENODEV;
+
+ return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+ might_sleep();
+
+ if (!dev->is_physfn)
+ return;
+
+ sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
+
+/**
+ * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration
+ * @dev: the PCI device
+ *
+ * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not.
+ *
+ * Physical Function driver is responsible to register IRQ handler using
+ * VF Migration Interrupt Message Number, and call this function when the
+ * interrupt is generated by the hardware.
+ */
+irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+ if (!dev->is_physfn)
+ return IRQ_NONE;
+
+ return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_migration);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index baba2eb5367d..6f2e6295e773 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -27,48 +27,53 @@ static int pci_msi_enable = 1;
/* Arch hooks */
-int __attribute__ ((weak))
-arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_msi_check_device
+int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
return 0;
}
+#endif
-int __attribute__ ((weak))
-arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
-{
- return 0;
-}
-
-int __attribute__ ((weak))
-arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_setup_msi_irqs
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
struct msi_desc *entry;
int ret;
+ /*
+ * If an architecture wants to support multiple MSI, it needs to
+ * override arch_setup_msi_irqs()
+ */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
list_for_each_entry(entry, &dev->msi_list, list) {
ret = arch_setup_msi_irq(dev, entry);
- if (ret)
+ if (ret < 0)
return ret;
+ if (ret > 0)
+ return -ENOSPC;
}
return 0;
}
+#endif
-void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
-{
- return;
-}
-
-void __attribute__ ((weak))
-arch_teardown_msi_irqs(struct pci_dev *dev)
+#ifndef arch_teardown_msi_irqs
+void arch_teardown_msi_irqs(struct pci_dev *dev)
{
struct msi_desc *entry;
list_for_each_entry(entry, &dev->msi_list, list) {
- if (entry->irq != 0)
- arch_teardown_msi_irq(entry->irq);
+ int i, nvec;
+ if (entry->irq == 0)
+ continue;
+ nvec = 1 << entry->msi_attrib.multiple;
+ for (i = 0; i < nvec; i++)
+ arch_teardown_msi_irq(entry->irq + i);
}
}
+#endif
static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
{
@@ -111,27 +116,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
return (1 << (1 << x)) - 1;
}
-static void msix_flush_writes(struct irq_desc *desc)
+static inline __attribute_const__ u32 msi_capable_mask(u16 control)
{
- struct msi_desc *entry;
+ return msi_mask((control >> 1) & 7);
+}
- entry = get_irq_desc_msi(desc);
- BUG_ON(!entry || !entry->dev);
- switch (entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- /* nothing to do */
- break;
- case PCI_CAP_ID_MSIX:
- {
- int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
- readl(entry->mask_base + offset);
- break;
- }
- default:
- BUG();
- break;
- }
+static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
+{
+ return msi_mask((control >> 4) & 7);
}
/*
@@ -143,49 +135,71 @@ static void msix_flush_writes(struct irq_desc *desc)
* Returns 1 if it succeeded in masking the interrupt and 0 if the device
* doesn't support MSI masking.
*/
-static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
+static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
- struct msi_desc *entry;
+ u32 mask_bits = desc->masked;
- entry = get_irq_desc_msi(desc);
- BUG_ON(!entry || !entry->dev);
- switch (entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- if (entry->msi_attrib.maskbit) {
- int pos;
- u32 mask_bits;
-
- pos = (long)entry->mask_base;
- pci_read_config_dword(entry->dev, pos, &mask_bits);
- mask_bits &= ~(mask);
- mask_bits |= flag & mask;
- pci_write_config_dword(entry->dev, pos, mask_bits);
- } else {
- return 0;
- }
- break;
- case PCI_CAP_ID_MSIX:
- {
- int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
- writel(flag, entry->mask_base + offset);
- readl(entry->mask_base + offset);
- break;
- }
- default:
- BUG();
- break;
+ if (!desc->msi_attrib.maskbit)
+ return;
+
+ mask_bits &= ~mask;
+ mask_bits |= flag;
+ pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
+ desc->masked = mask_bits;
+}
+
+/*
+ * This internal function does not flush PCI writes to the device.
+ * All users must ensure that they read from the device before either
+ * assuming that the device state is up to date, or returning out of this
+ * file. This saves a few milliseconds when initialising devices with lots
+ * of MSI-X interrupts.
+ */
+static void msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+ u32 mask_bits = desc->masked;
+ unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+ mask_bits &= ~1;
+ mask_bits |= flag;
+ writel(mask_bits, desc->mask_base + offset);
+ desc->masked = mask_bits;
+}
+
+static void msi_set_mask_bit(unsigned irq, u32 flag)
+{
+ struct msi_desc *desc = get_irq_msi(irq);
+
+ if (desc->msi_attrib.is_msix) {
+ msix_mask_irq(desc, flag);
+ readl(desc->mask_base); /* Flush write to device */
+ } else {
+ unsigned offset = irq - desc->dev->irq;
+ msi_mask_irq(desc, 1 << offset, flag << offset);
}
- entry->msi_attrib.masked = !!flag;
- return 1;
+}
+
+void mask_msi_irq(unsigned int irq)
+{
+ msi_set_mask_bit(irq, 1);
+}
+
+void unmask_msi_irq(unsigned int irq)
+{
+ msi_set_mask_bit(irq, 0);
}
void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- switch(entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- {
+ if (entry->msi_attrib.is_msix) {
+ void __iomem *base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+ msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+ msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+ msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
+ } else {
struct pci_dev *dev = entry->dev;
int pos = entry->msi_attrib.pos;
u16 data;
@@ -201,21 +215,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
}
msg->data = data;
- break;
- }
- case PCI_CAP_ID_MSIX:
- {
- void __iomem *base;
- base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
- msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
- break;
- }
- default:
- BUG();
}
}
@@ -229,11 +228,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- switch (entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- {
+ if (entry->msi_attrib.is_msix) {
+ void __iomem *base;
+ base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+ writel(msg->address_lo,
+ base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+ writel(msg->address_hi,
+ base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+ writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
+ } else {
struct pci_dev *dev = entry->dev;
int pos = entry->msi_attrib.pos;
+ u16 msgctl;
+
+ pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
+ msgctl &= ~PCI_MSI_FLAGS_QSIZE;
+ msgctl |= entry->msi_attrib.multiple << 4;
+ pci_write_config_word(dev, msi_control_reg(pos), msgctl);
pci_write_config_dword(dev, msi_lower_address_reg(pos),
msg->address_lo);
@@ -246,23 +259,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
pci_write_config_word(dev, msi_data_reg(pos, 0),
msg->data);
}
- break;
- }
- case PCI_CAP_ID_MSIX:
- {
- void __iomem *base;
- base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
- writel(msg->address_lo,
- base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
- writel(msg->address_hi,
- base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
- writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
- break;
- }
- default:
- BUG();
}
entry->msg = *msg;
}
@@ -274,37 +270,18 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
write_msi_msg_desc(desc, msg);
}
-void mask_msi_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- msi_set_mask_bits(desc, 1, 1);
- msix_flush_writes(desc);
-}
-
-void unmask_msi_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- msi_set_mask_bits(desc, 1, 0);
- msix_flush_writes(desc);
-}
-
static int msi_free_irqs(struct pci_dev* dev);
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
{
- struct msi_desc *entry;
-
- entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL);
- if (!entry)
+ struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
return NULL;
- INIT_LIST_HEAD(&entry->list);
- entry->irq = 0;
- entry->dev = NULL;
+ INIT_LIST_HEAD(&desc->list);
+ desc->dev = dev;
- return entry;
+ return desc;
}
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
@@ -328,15 +305,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
pci_intx_for_msi(dev, 0);
msi_set_enable(dev, 0);
write_msi_msg(dev->irq, &entry->msg);
- if (entry->msi_attrib.maskbit) {
- struct irq_desc *desc = irq_to_desc(dev->irq);
- msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
- entry->msi_attrib.masked);
- }
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+ msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
control &= ~PCI_MSI_FLAGS_QSIZE;
- control |= PCI_MSI_FLAGS_ENABLE;
+ control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
}
@@ -354,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
msix_set_enable(dev, 0);
list_for_each_entry(entry, &dev->msi_list, list) {
- struct irq_desc *desc = irq_to_desc(entry->irq);
write_msi_msg(entry->irq, &entry->msg);
- msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
+ msix_mask_irq(entry, entry->masked);
}
BUG_ON(list_empty(&dev->msi_list));
@@ -378,52 +350,48 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state);
/**
* msi_capability_init - configure device's MSI capability structure
* @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: number of interrupts to allocate
*
- * Setup the MSI capability structure of device function with a single
- * MSI irq, regardless of device function is capable of handling
- * multiple messages. A return of zero indicates the successful setup
- * of an entry zero with the new MSI irq or non-zero for otherwise.
- **/
-static int msi_capability_init(struct pci_dev *dev)
+ * Setup the MSI capability structure of the device with the requested
+ * number of interrupts. A return value of zero indicates the successful
+ * setup of an entry with the new MSI irq. A negative return value indicates
+ * an error, and a positive return value indicates the number of interrupts
+ * which could have been allocated.
+ */
+static int msi_capability_init(struct pci_dev *dev, int nvec)
{
struct msi_desc *entry;
int pos, ret;
u16 control;
+ unsigned mask;
msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
pci_read_config_word(dev, msi_control_reg(pos), &control);
/* MSI Entry Initialization */
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(dev);
if (!entry)
return -ENOMEM;
- entry->msi_attrib.type = PCI_CAP_ID_MSI;
+ entry->msi_attrib.is_msix = 0;
entry->msi_attrib.is_64 = is_64bit_address(control);
entry->msi_attrib.entry_nr = 0;
entry->msi_attrib.maskbit = is_mask_bit_support(control);
- entry->msi_attrib.masked = 1;
entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
entry->msi_attrib.pos = pos;
- entry->dev = dev;
- if (entry->msi_attrib.maskbit) {
- unsigned int base, maskbits, temp;
-
- base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
- entry->mask_base = (void __iomem *)(long)base;
-
- /* All MSIs are unmasked by default, Mask them all */
- pci_read_config_dword(dev, base, &maskbits);
- temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1);
- maskbits |= temp;
- pci_write_config_dword(dev, base, maskbits);
- entry->msi_attrib.maskbits_mask = temp;
- }
+
+ entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
+ /* All MSIs are unmasked by default, Mask them all */
+ if (entry->msi_attrib.maskbit)
+ pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
+ mask = msi_capable_mask(control);
+ msi_mask_irq(entry, mask, mask);
+
list_add_tail(&entry->list, &dev->msi_list);
/* Configure MSI capability structure */
- ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI);
+ ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
if (ret) {
msi_free_irqs(dev);
return ret;
@@ -476,26 +444,28 @@ static int msix_capability_init(struct pci_dev *dev,
/* MSI-X Table Initialization */
for (i = 0; i < nvec; i++) {
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(dev);
if (!entry)
break;
j = entries[i].entry;
- entry->msi_attrib.type = PCI_CAP_ID_MSIX;
+ entry->msi_attrib.is_msix = 1;
entry->msi_attrib.is_64 = 1;
entry->msi_attrib.entry_nr = j;
- entry->msi_attrib.maskbit = 1;
- entry->msi_attrib.masked = 1;
entry->msi_attrib.default_irq = dev->irq;
entry->msi_attrib.pos = pos;
- entry->dev = dev;
entry->mask_base = base;
+ entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ msix_mask_irq(entry, 1);
list_add_tail(&entry->list, &dev->msi_list);
}
ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
- if (ret) {
+ if (ret < 0) {
+ /* If we had some success report the number of irqs
+ * we succeeded in setting up. */
int avail = 0;
list_for_each_entry(entry, &dev->msi_list, list) {
if (entry->irq != 0) {
@@ -503,14 +473,13 @@ static int msix_capability_init(struct pci_dev *dev,
}
}
- msi_free_irqs(dev);
+ if (avail != 0)
+ ret = avail;
+ }
- /* If we had some success report the number of irqs
- * we succeeded in setting up.
- */
- if (avail == 0)
- avail = ret;
- return avail;
+ if (ret) {
+ msi_free_irqs(dev);
+ return ret;
}
i = 0;
@@ -575,39 +544,54 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
}
/**
- * pci_enable_msi - configure device's MSI capability structure
- * @dev: pointer to the pci_dev data structure of MSI device function
+ * pci_enable_msi_block - configure device's MSI capability structure
+ * @dev: device to configure
+ * @nvec: number of interrupts to configure
*
- * Setup the MSI capability structure of device function with
- * a single MSI irq upon its software driver call to request for
- * MSI mode enabled on its hardware device function. A return of zero
- * indicates the successful setup of an entry zero with the new MSI
- * irq or non-zero for otherwise.
- **/
-int pci_enable_msi(struct pci_dev* dev)
+ * Allocate IRQs for a device with the MSI capability.
+ * This function returns a negative errno if an error occurs. If it
+ * is unable to allocate the number of interrupts requested, it returns
+ * the number of interrupts it might be able to allocate. If it successfully
+ * allocates at least the number of interrupts requested, it returns 0 and
+ * updates the @dev's irq member to the lowest new interrupt number; the
+ * other interrupt numbers allocated to this device are consecutive.
+ */
+int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
{
- int status;
+ int status, pos, maxvec;
+ u16 msgctl;
- status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ if (!pos)
+ return -EINVAL;
+ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+ maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+ if (nvec > maxvec)
+ return maxvec;
+
+ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
if (status)
return status;
WARN_ON(!!dev->msi_enabled);
- /* Check whether driver already requested for MSI-X irqs */
+ /* Check whether driver already requested MSI-X irqs */
if (dev->msix_enabled) {
dev_info(&dev->dev, "can't enable MSI "
"(MSI-X already enabled)\n");
return -EINVAL;
}
- status = msi_capability_init(dev);
+
+ status = msi_capability_init(dev, nvec);
return status;
}
-EXPORT_SYMBOL(pci_enable_msi);
+EXPORT_SYMBOL(pci_enable_msi_block);
-void pci_msi_shutdown(struct pci_dev* dev)
+void pci_msi_shutdown(struct pci_dev *dev)
{
- struct msi_desc *entry;
+ struct msi_desc *desc;
+ u32 mask;
+ u16 ctrl;
if (!pci_msi_enable || !dev || !dev->msi_enabled)
return;
@@ -617,19 +601,15 @@ void pci_msi_shutdown(struct pci_dev* dev)
dev->msi_enabled = 0;
BUG_ON(list_empty(&dev->msi_list));
- entry = list_entry(dev->msi_list.next, struct msi_desc, list);
- /* Return the the pci reset with msi irqs unmasked */
- if (entry->msi_attrib.maskbit) {
- u32 mask = entry->msi_attrib.maskbits_mask;
- struct irq_desc *desc = irq_to_desc(dev->irq);
- msi_set_mask_bits(desc, mask, ~mask);
- }
- if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
- return;
+ desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
+ pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl);
+ mask = msi_capable_mask(ctrl);
+ msi_mask_irq(desc, mask, ~mask);
/* Restore dev->irq to its default pin-assertion irq */
- dev->irq = entry->msi_attrib.default_irq;
+ dev->irq = desc->msi_attrib.default_irq;
}
+
void pci_disable_msi(struct pci_dev* dev)
{
struct msi_desc *entry;
@@ -640,7 +620,7 @@ void pci_disable_msi(struct pci_dev* dev)
pci_msi_shutdown(dev);
entry = list_entry(dev->msi_list.next, struct msi_desc, list);
- if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
+ if (entry->msi_attrib.is_msix)
return;
msi_free_irqs(dev);
@@ -652,14 +632,18 @@ static int msi_free_irqs(struct pci_dev* dev)
struct msi_desc *entry, *tmp;
list_for_each_entry(entry, &dev->msi_list, list) {
- if (entry->irq)
- BUG_ON(irq_has_action(entry->irq));
+ int i, nvec;
+ if (!entry->irq)
+ continue;
+ nvec = 1 << entry->msi_attrib.multiple;
+ for (i = 0; i < nvec; i++)
+ BUG_ON(irq_has_action(entry->irq + i));
}
arch_teardown_msi_irqs(dev);
list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
- if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) {
+ if (entry->msi_attrib.is_msix) {
writel(1, entry->mask_base + entry->msi_attrib.entry_nr
* PCI_MSIX_ENTRY_SIZE
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
@@ -675,6 +659,23 @@ static int msi_free_irqs(struct pci_dev* dev)
}
/**
+ * pci_msix_table_size - return the number of device's MSI-X table entries
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ */
+int pci_msix_table_size(struct pci_dev *dev)
+{
+ int pos;
+ u16 control;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+ if (!pos)
+ return 0;
+
+ pci_read_config_word(dev, msi_control_reg(pos), &control);
+ return multi_msix_capable(control);
+}
+
+/**
* pci_enable_msix - configure device's MSI-X capability structure
* @dev: pointer to the pci_dev data structure of MSI-X device function
* @entries: pointer to an array of MSI-X entries
@@ -691,9 +692,8 @@ static int msi_free_irqs(struct pci_dev* dev)
**/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
- int status, pos, nr_entries;
+ int status, nr_entries;
int i, j;
- u16 control;
if (!entries)
return -EINVAL;
@@ -702,9 +702,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
if (status)
return status;
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
- pci_read_config_word(dev, msi_control_reg(pos), &control);
- nr_entries = multi_msix_capable(control);
+ nr_entries = pci_msix_table_size(dev);
if (nvec > nr_entries)
return -EINVAL;
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 3898f5237144..71f4df2ef654 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -20,14 +20,8 @@
#define msi_mask_bits_reg(base, is64bit) \
( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
#define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE
-#define multi_msi_capable(control) \
- (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
-#define multi_msi_enable(control, num) \
- control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
-#define msi_enable(control, num) multi_msi_enable(control, num); \
- control |= PCI_MSI_FLAGS_ENABLE
#define msix_table_offset_reg(base) (base + 0x04)
#define msix_pba_offset_reg(base) (base + 0x08)
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index deea8a187eb8..fac5eddcefd2 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -18,221 +18,6 @@
#include <linux/pci-acpi.h>
#include "pci.h"
-struct acpi_osc_data {
- acpi_handle handle;
- u32 support_set;
- u32 control_set;
- u32 control_query;
- int is_queried;
- struct list_head sibiling;
-};
-static LIST_HEAD(acpi_osc_data_list);
-
-struct acpi_osc_args {
- u32 capbuf[3];
-};
-
-static DEFINE_MUTEX(pci_acpi_lock);
-
-static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle)
-{
- struct acpi_osc_data *data;
-
- list_for_each_entry(data, &acpi_osc_data_list, sibiling) {
- if (data->handle == handle)
- return data;
- }
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return NULL;
- INIT_LIST_HEAD(&data->sibiling);
- data->handle = handle;
- list_add_tail(&data->sibiling, &acpi_osc_data_list);
- return data;
-}
-
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
- 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
-
-static acpi_status acpi_run_osc(acpi_handle handle,
- struct acpi_osc_args *osc_args, u32 *retval)
-{
- acpi_status status;
- struct acpi_object_list input;
- union acpi_object in_params[4];
- struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
- union acpi_object *out_obj;
- u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE];
-
- /* Setting up input parameters */
- input.count = 4;
- input.pointer = in_params;
- in_params[0].type = ACPI_TYPE_BUFFER;
- in_params[0].buffer.length = 16;
- in_params[0].buffer.pointer = OSC_UUID;
- in_params[1].type = ACPI_TYPE_INTEGER;
- in_params[1].integer.value = 1;
- in_params[2].type = ACPI_TYPE_INTEGER;
- in_params[2].integer.value = 3;
- in_params[3].type = ACPI_TYPE_BUFFER;
- in_params[3].buffer.length = 12;
- in_params[3].buffer.pointer = (u8 *)osc_args->capbuf;
-
- status = acpi_evaluate_object(handle, "_OSC", &input, &output);
- if (ACPI_FAILURE(status))
- return status;
-
- if (!output.length)
- return AE_NULL_OBJECT;
-
- out_obj = output.pointer;
- if (out_obj->type != ACPI_TYPE_BUFFER) {
- printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n");
- status = AE_TYPE;
- goto out_kfree;
- }
- /* Need to ignore the bit0 in result code */
- errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
- if (errors) {
- if (errors & OSC_REQUEST_ERROR)
- printk(KERN_DEBUG "_OSC request fails\n");
- if (errors & OSC_INVALID_UUID_ERROR)
- printk(KERN_DEBUG "_OSC invalid UUID\n");
- if (errors & OSC_INVALID_REVISION_ERROR)
- printk(KERN_DEBUG "_OSC invalid revision\n");
- if (errors & OSC_CAPABILITIES_MASK_ERROR) {
- if (flags & OSC_QUERY_ENABLE)
- goto out_success;
- printk(KERN_DEBUG "_OSC FW not grant req. control\n");
- status = AE_SUPPORT;
- goto out_kfree;
- }
- status = AE_ERROR;
- goto out_kfree;
- }
-out_success:
- *retval = *((u32 *)(out_obj->buffer.pointer + 8));
- status = AE_OK;
-
-out_kfree:
- kfree(output.pointer);
- return status;
-}
-
-static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data)
-{
- acpi_status status;
- u32 support_set, result;
- struct acpi_osc_args osc_args;
-
- /* do _OSC query for all possible controls */
- support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS);
- osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
- osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set;
- osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
-
- status = acpi_run_osc(osc_data->handle, &osc_args, &result);
- if (ACPI_SUCCESS(status)) {
- osc_data->support_set = support_set;
- osc_data->control_query = result;
- osc_data->is_queried = 1;
- }
-
- return status;
-}
-
-/*
- * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature
- * @flags: Bitmask of flags to support
- *
- * See the ACPI spec for the definition of the flags
- */
-int pci_acpi_osc_support(acpi_handle handle, u32 flags)
-{
- acpi_status status;
- acpi_handle tmp;
- struct acpi_osc_data *osc_data;
- int rc = 0;
-
- status = acpi_get_handle(handle, "_OSC", &tmp);
- if (ACPI_FAILURE(status))
- return -ENOTTY;
-
- mutex_lock(&pci_acpi_lock);
- osc_data = acpi_get_osc_data(handle);
- if (!osc_data) {
- printk(KERN_ERR "acpi osc data array is full\n");
- rc = -ENOMEM;
- goto out;
- }
-
- __acpi_query_osc(flags, osc_data);
-out:
- mutex_unlock(&pci_acpi_lock);
- return rc;
-}
-
-/**
- * pci_osc_control_set - commit requested control to Firmware
- * @handle: acpi_handle for the target ACPI object
- * @flags: driver's requested control bits
- *
- * Attempt to take control from Firmware on requested control bits.
- **/
-acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{
- acpi_status status;
- u32 control_req, control_set, result;
- acpi_handle tmp;
- struct acpi_osc_data *osc_data;
- struct acpi_osc_args osc_args;
-
- status = acpi_get_handle(handle, "_OSC", &tmp);
- if (ACPI_FAILURE(status))
- return status;
-
- mutex_lock(&pci_acpi_lock);
- osc_data = acpi_get_osc_data(handle);
- if (!osc_data) {
- printk(KERN_ERR "acpi osc data array is full\n");
- status = AE_ERROR;
- goto out;
- }
-
- control_req = (flags & OSC_CONTROL_MASKS);
- if (!control_req) {
- status = AE_TYPE;
- goto out;
- }
-
- /* No need to evaluate _OSC if the control was already granted. */
- if ((osc_data->control_set & control_req) == control_req)
- goto out;
-
- if (!osc_data->is_queried) {
- status = __acpi_query_osc(osc_data->support_set, osc_data);
- if (ACPI_FAILURE(status))
- goto out;
- }
-
- if ((osc_data->control_query & control_req) != control_req) {
- status = AE_SUPPORT;
- goto out;
- }
-
- control_set = osc_data->control_set | control_req;
- osc_args.capbuf[OSC_QUERY_TYPE] = 0;
- osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set;
- osc_args.capbuf[OSC_CONTROL_TYPE] = control_set;
- status = acpi_run_osc(handle, &osc_args, &result);
- if (ACPI_SUCCESS(status))
- osc_data->control_set = result;
-out:
- mutex_unlock(&pci_acpi_lock);
- return status;
-}
-EXPORT_SYMBOL(pci_osc_control_set);
-
/*
* _SxD returns the D-state with the highest power
* (lowest D-state number) supported in the S-state "x".
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 267de88551c9..c0cbbb5a245e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
}
static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
+/**
+ * store_remove_id - remove a PCI device ID from this driver
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Removes a dynamic pci device ID to this driver.
+ */
+static ssize_t
+store_remove_id(struct device_driver *driver, const char *buf, size_t count)
+{
+ struct pci_dynid *dynid, *n;
+ struct pci_driver *pdrv = to_pci_driver(driver);
+ __u32 vendor, device, subvendor = PCI_ANY_ID,
+ subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+ int fields = 0;
+ int retval = -ENODEV;
+
+ fields = sscanf(buf, "%x %x %x %x %x %x",
+ &vendor, &device, &subvendor, &subdevice,
+ &class, &class_mask);
+ if (fields < 2)
+ return -EINVAL;
+
+ spin_lock(&pdrv->dynids.lock);
+ list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) {
+ struct pci_device_id *id = &dynid->id;
+ if ((id->vendor == vendor) &&
+ (id->device == device) &&
+ (subvendor == PCI_ANY_ID || id->subvendor == subvendor) &&
+ (subdevice == PCI_ANY_ID || id->subdevice == subdevice) &&
+ !((id->class ^ class) & class_mask)) {
+ list_del(&dynid->node);
+ kfree(dynid);
+ retval = 0;
+ break;
+ }
+ }
+ spin_unlock(&pdrv->dynids.lock);
+
+ if (retval)
+ return retval;
+ return count;
+}
+static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
+
static void
pci_free_dynids(struct pci_driver *drv)
{
@@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv)
{
driver_remove_file(&drv->driver, &driver_attr_new_id);
}
+
+static int
+pci_create_removeid_file(struct pci_driver *drv)
+{
+ int error = 0;
+ if (drv->probe != NULL)
+ error = driver_create_file(&drv->driver,&driver_attr_remove_id);
+ return error;
+}
+
+static void pci_remove_removeid_file(struct pci_driver *drv)
+{
+ driver_remove_file(&drv->driver, &driver_attr_remove_id);
+}
#else /* !CONFIG_HOTPLUG */
static inline void pci_free_dynids(struct pci_driver *drv) {}
static inline int pci_create_newid_file(struct pci_driver *drv)
@@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv)
return 0;
}
static inline void pci_remove_newid_file(struct pci_driver *drv) {}
+static inline int pci_create_removeid_file(struct pci_driver *drv)
+{
+ return 0;
+}
+static inline void pci_remove_removeid_file(struct pci_driver *drv) {}
#endif
/**
@@ -899,13 +964,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
/* register with core */
error = driver_register(&drv->driver);
if (error)
- return error;
+ goto out;
error = pci_create_newid_file(drv);
if (error)
- driver_unregister(&drv->driver);
+ goto out_newid;
+ error = pci_create_removeid_file(drv);
+ if (error)
+ goto out_removeid;
+out:
return error;
+
+out_removeid:
+ pci_remove_newid_file(drv);
+out_newid:
+ driver_unregister(&drv->driver);
+ goto out;
}
/**
@@ -921,6 +996,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
void
pci_unregister_driver(struct pci_driver *drv)
{
+ pci_remove_removeid_file(drv);
pci_remove_newid_file(drv);
driver_unregister(&drv->driver);
pci_free_dynids(drv);
@@ -1020,6 +1096,7 @@ struct bus_type pci_bus_type = {
.remove = pci_device_remove,
.shutdown = pci_device_shutdown,
.dev_attrs = pci_dev_attrs,
+ .bus_attrs = pci_bus_attrs,
.pm = PCI_PM_OPS_PTR,
};
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index dfc4e0ddf241..e9a8706a6401 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -219,6 +219,83 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
return count;
}
+#ifdef CONFIG_HOTPLUG
+static DEFINE_MUTEX(pci_remove_rescan_mutex);
+static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ struct pci_bus *b = NULL;
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val) {
+ mutex_lock(&pci_remove_rescan_mutex);
+ while ((b = pci_find_next_bus(b)) != NULL)
+ pci_rescan_bus(b);
+ mutex_unlock(&pci_remove_rescan_mutex);
+ }
+ return count;
+}
+
+struct bus_attribute pci_bus_attrs[] = {
+ __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store),
+ __ATTR_NULL
+};
+
+static ssize_t
+dev_rescan_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val) {
+ mutex_lock(&pci_remove_rescan_mutex);
+ pci_rescan_bus(pdev->bus);
+ mutex_unlock(&pci_remove_rescan_mutex);
+ }
+ return count;
+}
+
+static void remove_callback(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ mutex_lock(&pci_remove_rescan_mutex);
+ pci_remove_bus_device(pdev);
+ mutex_unlock(&pci_remove_rescan_mutex);
+}
+
+static ssize_t
+remove_store(struct device *dev, struct device_attribute *dummy,
+ const char *buf, size_t count)
+{
+ int ret = 0;
+ unsigned long val;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (pci_is_root_bus(pdev->bus))
+ return -EBUSY;
+
+ /* An attribute cannot be unregistered by one of its own methods,
+ * so we have to use this roundabout approach.
+ */
+ if (val)
+ ret = device_schedule_callback(dev, remove_callback);
+ if (ret)
+ count = ret;
+ return count;
+}
+#endif
+
struct device_attribute pci_dev_attrs[] = {
__ATTR_RO(resource),
__ATTR_RO(vendor),
@@ -237,10 +314,25 @@ struct device_attribute pci_dev_attrs[] = {
__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
broken_parity_status_show,broken_parity_status_store),
__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
+#ifdef CONFIG_HOTPLUG
+ __ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
+ __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
+#endif
__ATTR_NULL,
};
static ssize_t
+boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ return sprintf(buf, "%u\n",
+ !!(pdev->resource[PCI_ROM_RESOURCE].flags &
+ IORESOURCE_ROM_SHADOW));
+}
+struct device_attribute vga_attr = __ATTR_RO(boot_vga);
+
+static ssize_t
pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -493,6 +585,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
}
/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Stub implementation. Can be overridden by arch if necessary.
+ */
+void __weak
+pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type)
+{
+ return;
+}
+
+/**
* pci_create_legacy_files - create legacy I/O port and memory files
* @b: bus to create files under
*
@@ -518,6 +623,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_io->read = pci_read_legacy_io;
b->legacy_io->write = pci_write_legacy_io;
b->legacy_io->mmap = pci_mmap_legacy_io;
+ pci_adjust_legacy_attr(b, pci_mmap_io);
error = device_create_bin_file(&b->dev, b->legacy_io);
if (error)
goto legacy_io_err;
@@ -528,6 +634,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_mem->size = 1024*1024;
b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
b->legacy_mem->mmap = pci_mmap_legacy_mem;
+ pci_adjust_legacy_attr(b, pci_mmap_mem);
error = device_create_bin_file(&b->dev, b->legacy_mem);
if (error)
goto legacy_mem_err;
@@ -719,8 +826,8 @@ static int pci_create_resource_files(struct pci_dev *pdev)
return 0;
}
#else /* !HAVE_PCI_MMAP */
-static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; }
-static inline void pci_remove_resource_files(struct pci_dev *dev) { return; }
+int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; }
+void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
#endif /* HAVE_PCI_MMAP */
/**
@@ -884,18 +991,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
pdev->rom_attr = attr;
}
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {
+ retval = device_create_file(&pdev->dev, &vga_attr);
+ if (retval)
+ goto err_rom_file;
+ }
+
/* add platform-specific attributes */
retval = pcibios_add_platform_entries(pdev);
if (retval)
- goto err_rom_file;
+ goto err_vga_file;
/* add sysfs entries for various capabilities */
retval = pci_create_capabilities_sysfs(pdev);
if (retval)
- goto err_rom_file;
+ goto err_vga_file;
return 0;
+err_vga_file:
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ device_remove_file(&pdev->dev, &vga_attr);
err_rom_file:
if (rom_size) {
sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0195066251e5..fe7ac2cea7c9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -20,6 +20,8 @@
#include <linux/pm_wakeup.h>
#include <linux/interrupt.h>
#include <asm/dma.h> /* isa_dma_bridge_buggy */
+#include <linux/device.h>
+#include <asm/setup.h>
#include "pci.h"
unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
@@ -677,6 +679,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
EXPORT_SYMBOL(pci_choose_state);
+#define PCI_EXP_SAVE_REGS 7
+
static int pci_save_pcie_state(struct pci_dev *dev)
{
int pos, i = 0;
@@ -689,7 +693,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
if (!save_state) {
- dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+ dev_err(&dev->dev, "buffer not found in %s\n", __func__);
return -ENOMEM;
}
cap = (u16 *)&save_state->data[0];
@@ -698,6 +702,9 @@ static int pci_save_pcie_state(struct pci_dev *dev)
pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]);
pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]);
pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]);
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]);
+ pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]);
+ pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]);
return 0;
}
@@ -718,6 +725,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]);
pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]);
pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]);
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]);
+ pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]);
+ pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]);
}
@@ -732,7 +742,7 @@ static int pci_save_pcix_state(struct pci_dev *dev)
save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
if (!save_state) {
- dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+ dev_err(&dev->dev, "buffer not found in %s\n", __func__);
return -ENOMEM;
}
@@ -805,6 +815,7 @@ pci_restore_state(struct pci_dev *dev)
}
pci_restore_pcix_state(dev);
pci_restore_msi_state(dev);
+ pci_restore_iov_state(dev);
return 0;
}
@@ -1401,7 +1412,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
{
int error;
- error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16));
+ error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP,
+ PCI_EXP_SAVE_REGS * sizeof(u16));
if (error)
dev_err(&dev->dev,
"unable to preallocate PCI Express save buffer\n");
@@ -1472,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
if (!pin)
return -1;
- while (dev->bus->self) {
+ while (dev->bus->parent) {
pin = pci_swizzle_interrupt_pin(dev, pin);
dev = dev->bus->self;
}
@@ -1492,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp)
{
u8 pin = *pinp;
- while (dev->bus->self) {
+ while (dev->bus->parent) {
pin = pci_swizzle_interrupt_pin(dev, pin);
dev = dev->bus->self;
}
@@ -2016,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe)
pci_block_user_cfg_access(dev);
/* Wait for Transaction Pending bit clean */
+ pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+ if (!(status & PCI_EXP_DEVSTA_TRPND))
+ goto transaction_done;
+
msleep(100);
pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
- if (status & PCI_EXP_DEVSTA_TRPND) {
- dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
+ if (!(status & PCI_EXP_DEVSTA_TRPND))
+ goto transaction_done;
+
+ dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
"sleeping for 1 second\n");
- ssleep(1);
- pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
- if (status & PCI_EXP_DEVSTA_TRPND)
- dev_info(&dev->dev, "Still busy after 1s; "
+ ssleep(1);
+ pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+ if (status & PCI_EXP_DEVSTA_TRPND)
+ dev_info(&dev->dev, "Still busy after 1s; "
"proceeding with reset anyway\n");
- }
+transaction_done:
pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
PCI_EXP_DEVCTL_BCR_FLR);
mdelay(100);
@@ -2054,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe)
pci_block_user_cfg_access(dev);
/* Wait for Transaction Pending bit clean */
+ pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+ if (!(status & PCI_AF_STATUS_TP))
+ goto transaction_done;
+
msleep(100);
pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
- if (status & PCI_AF_STATUS_TP) {
- dev_info(&dev->dev, "Busy after 100ms while trying to"
- " reset; sleeping for 1 second\n");
- ssleep(1);
- pci_read_config_byte(dev,
- cappos + PCI_AF_STATUS, &status);
- if (status & PCI_AF_STATUS_TP)
- dev_info(&dev->dev, "Still busy after 1s; "
- "proceeding with reset anyway\n");
- }
+ if (!(status & PCI_AF_STATUS_TP))
+ goto transaction_done;
+
+ dev_info(&dev->dev, "Busy after 100ms while trying to"
+ " reset; sleeping for 1 second\n");
+ ssleep(1);
+ pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+ if (status & PCI_AF_STATUS_TP)
+ dev_info(&dev->dev, "Still busy after 1s; "
+ "proceeding with reset anyway\n");
+
+transaction_done:
pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
mdelay(100);
@@ -2334,18 +2358,140 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags)
*/
int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
{
+ int reg;
+
if (resno < PCI_ROM_RESOURCE) {
*type = pci_bar_unknown;
return PCI_BASE_ADDRESS_0 + 4 * resno;
} else if (resno == PCI_ROM_RESOURCE) {
*type = pci_bar_mem32;
return dev->rom_base_reg;
+ } else if (resno < PCI_BRIDGE_RESOURCES) {
+ /* device specific resource */
+ reg = pci_iov_resource_bar(dev, resno, type);
+ if (reg)
+ return reg;
}
dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
return 0;
}
+#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
+static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
+spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
+
+/**
+ * pci_specified_resource_alignment - get resource alignment specified by user.
+ * @dev: the PCI device to get
+ *
+ * RETURNS: Resource alignment if it is specified.
+ * Zero if it is not specified.
+ */
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+{
+ int seg, bus, slot, func, align_order, count;
+ resource_size_t align = 0;
+ char *p;
+
+ spin_lock(&resource_alignment_lock);
+ p = resource_alignment_param;
+ while (*p) {
+ count = 0;
+ if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
+ p[count] == '@') {
+ p += count + 1;
+ } else {
+ align_order = -1;
+ }
+ if (sscanf(p, "%x:%x:%x.%x%n",
+ &seg, &bus, &slot, &func, &count) != 4) {
+ seg = 0;
+ if (sscanf(p, "%x:%x.%x%n",
+ &bus, &slot, &func, &count) != 3) {
+ /* Invalid format */
+ printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n",
+ p);
+ break;
+ }
+ }
+ p += count;
+ if (seg == pci_domain_nr(dev->bus) &&
+ bus == dev->bus->number &&
+ slot == PCI_SLOT(dev->devfn) &&
+ func == PCI_FUNC(dev->devfn)) {
+ if (align_order == -1) {
+ align = PAGE_SIZE;
+ } else {
+ align = 1 << align_order;
+ }
+ /* Found */
+ break;
+ }
+ if (*p != ';' && *p != ',') {
+ /* End of param or invalid format */
+ break;
+ }
+ p++;
+ }
+ spin_unlock(&resource_alignment_lock);
+ return align;
+}
+
+/**
+ * pci_is_reassigndev - check if specified PCI is target device to reassign
+ * @dev: the PCI device to check
+ *
+ * RETURNS: non-zero for PCI device is a target device to reassign,
+ * or zero is not.
+ */
+int pci_is_reassigndev(struct pci_dev *dev)
+{
+ return (pci_specified_resource_alignment(dev) != 0);
+}
+
+ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
+{
+ if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
+ count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1;
+ spin_lock(&resource_alignment_lock);
+ strncpy(resource_alignment_param, buf, count);
+ resource_alignment_param[count] = '\0';
+ spin_unlock(&resource_alignment_lock);
+ return count;
+}
+
+ssize_t pci_get_resource_alignment_param(char *buf, size_t size)
+{
+ size_t count;
+ spin_lock(&resource_alignment_lock);
+ count = snprintf(buf, size, "%s", resource_alignment_param);
+ spin_unlock(&resource_alignment_lock);
+ return count;
+}
+
+static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf)
+{
+ return pci_get_resource_alignment_param(buf, PAGE_SIZE);
+}
+
+static ssize_t pci_resource_alignment_store(struct bus_type *bus,
+ const char *buf, size_t count)
+{
+ return pci_set_resource_alignment_param(buf, count);
+}
+
+BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show,
+ pci_resource_alignment_store);
+
+static int __init pci_resource_alignment_sysfs_init(void)
+{
+ return bus_create_file(&pci_bus_type,
+ &bus_attr_resource_alignment);
+}
+
+late_initcall(pci_resource_alignment_sysfs_init);
+
static void __devinit pci_no_domains(void)
{
#ifdef CONFIG_PCI_DOMAINS
@@ -2394,6 +2540,9 @@ static int __init pci_setup(char *str)
pci_cardbus_io_size = memparse(str + 9, &str);
} else if (!strncmp(str, "cbmemsize=", 10)) {
pci_cardbus_mem_size = memparse(str + 10, &str);
+ } else if (!strncmp(str, "resource_alignment=", 19)) {
+ pci_set_resource_alignment_param(str + 19,
+ strlen(str + 19));
} else {
printk(KERN_ERR "PCI: Unknown option `%s'\n",
str);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 149fff65891f..d03f6b99f292 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,6 +1,8 @@
#ifndef DRIVERS_PCI_H
#define DRIVERS_PCI_H
+#include <linux/workqueue.h>
+
#define PCI_CFG_SPACE_SIZE 256
#define PCI_CFG_SPACE_EXP_SIZE 4096
@@ -135,6 +137,12 @@ extern int pcie_mch_quirk;
extern struct device_attribute pci_dev_attrs[];
extern struct device_attribute dev_attr_cpuaffinity;
extern struct device_attribute dev_attr_cpulistaffinity;
+#ifdef CONFIG_HOTPLUG
+extern struct bus_attribute pci_bus_attrs[];
+#else
+#define pci_bus_attrs NULL
+#endif
+
/**
* pci_match_one_device - Tell if a PCI device structure has a matching
@@ -177,6 +185,7 @@ enum pci_bar_type {
pci_bar_mem64, /* A 64-bit memory BAR */
};
+extern int pci_setup_device(struct pci_dev *dev);
extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
struct resource *res, unsigned int reg);
extern int pci_resource_bar(struct pci_dev *dev, int resno,
@@ -194,4 +203,60 @@ static inline int pci_ari_enabled(struct pci_bus *bus)
return bus->self && bus->self->ari_enabled;
}
+#ifdef CONFIG_PCI_QUIRKS
+extern int pci_is_reassigndev(struct pci_dev *dev);
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
+extern void pci_disable_bridge_window(struct pci_dev *dev);
+#endif
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+ int pos; /* capability position */
+ int nres; /* number of resources */
+ u32 cap; /* SR-IOV Capabilities */
+ u16 ctrl; /* SR-IOV Control */
+ u16 total; /* total VFs associated with the PF */
+ u16 initial; /* initial VFs associated with the PF */
+ u16 nr_virtfn; /* number of VFs available */
+ u16 offset; /* first VF Routing ID offset */
+ u16 stride; /* following VF stride */
+ u32 pgsz; /* page size for BAR alignment */
+ u8 link; /* Function Dependency Link */
+ struct pci_dev *dev; /* lowest numbered PF */
+ struct pci_dev *self; /* this PF */
+ struct mutex lock; /* lock for VF bus */
+ struct work_struct mtask; /* VF Migration task */
+ u8 __iomem *mstate; /* VF Migration State Array */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+ enum pci_bar_type *type);
+extern void pci_restore_iov_state(struct pci_dev *dev);
+extern int pci_iov_bus_range(struct pci_bus *bus);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+ return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+
+{
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+ enum pci_bar_type *type)
+{
+ return 0;
+}
+static inline void pci_restore_iov_state(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index e390707661dd..32ade5af927e 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -38,30 +38,13 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
-static int __devinit aer_probe (struct pcie_device *dev,
- const struct pcie_port_service_id *id );
+static int __devinit aer_probe (struct pcie_device *dev);
static void aer_remove(struct pcie_device *dev);
-static int aer_suspend(struct pcie_device *dev, pm_message_t state)
-{return 0;}
-static int aer_resume(struct pcie_device *dev) {return 0;}
static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
enum pci_channel_state error);
static void aer_error_resume(struct pci_dev *dev);
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
-/*
- * PCI Express bus's AER Root service driver data structure
- */
-static struct pcie_port_service_id aer_id[] = {
- {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_RC_PORT,
- .service_type = PCIE_PORT_SERVICE_AER,
- },
- { /* end: all zeroes */ }
-};
-
static struct pci_error_handlers aer_error_handlers = {
.error_detected = aer_error_detected,
.resume = aer_error_resume,
@@ -69,14 +52,12 @@ static struct pci_error_handlers aer_error_handlers = {
static struct pcie_port_service_driver aerdriver = {
.name = "aer",
- .id_table = &aer_id[0],
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_AER,
.probe = aer_probe,
.remove = aer_remove,
- .suspend = aer_suspend,
- .resume = aer_resume,
-
.err_handler = &aer_error_handlers,
.reset_link = aer_root_reset,
@@ -207,8 +188,7 @@ static void aer_remove(struct pcie_device *dev)
*
* Invoked when PCI Express bus loads AER service driver.
**/
-static int __devinit aer_probe (struct pcie_device *dev,
- const struct pcie_port_service_id *id )
+static int __devinit aer_probe (struct pcie_device *dev)
{
int status;
struct aer_rpc *rpc;
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
index ebce26c37049..8edb2f300e8f 100644
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev)
handle = acpi_find_root_bridge_handle(pdev);
if (handle) {
- status = pci_osc_control_set(handle,
+ status = acpi_pci_osc_control_set(handle,
OSC_PCI_EXPRESS_AER_CONTROL |
OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
}
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 382575007382..307452f30035 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data)
{
struct device_driver *driver;
struct pcie_port_service_driver *service_driver;
- struct pcie_device *pcie_dev;
struct find_aer_service_data *result;
result = (struct find_aer_service_data *) data;
if (device->bus == &pcie_port_bus_type) {
- pcie_dev = to_pcie_device(device);
- if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT)
+ struct pcie_port_data *port_data;
+
+ port_data = pci_get_drvdata(to_pcie_device(device)->port);
+ if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
result->is_downstream = 1;
driver = device->driver;
if (driver) {
service_driver = to_service_driver(driver);
- if (service_driver->id_table->service_type ==
- PCIE_PORT_SERVICE_AER) {
+ if (service_driver->service == PCIE_PORT_SERVICE_AER) {
result->aer_driver = service_driver;
return 1;
}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 2529f3f2ea5a..17ad53868f9f 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,19 +25,21 @@
#define PCIE_CAPABILITIES_REG 0x2
#define PCIE_SLOT_CAPABILITIES_REG 0x14
#define PCIE_PORT_DEVICE_MAXSERVICES 4
+#define PCIE_PORT_MSI_VECTOR_MASK 0x1f
+/*
+ * According to the PCI Express Base Specification 2.0, the indices of the MSI-X
+ * table entires used by port services must not exceed 31
+ */
+#define PCIE_PORT_MAX_MSIX_ENTRIES 32
#define get_descriptor_id(type, service) (((type - 4) << 4) | service)
-struct pcie_port_device_ext {
- int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */
-};
-
extern struct bus_type pcie_port_bus_type;
extern int pcie_port_device_probe(struct pci_dev *dev);
extern int pcie_port_device_register(struct pci_dev *dev);
#ifdef CONFIG_PM
-extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state);
-extern int pcie_port_device_resume(struct pci_dev *dev);
+extern int pcie_port_device_suspend(struct device *dev);
+extern int pcie_port_device_resume(struct device *dev);
#endif
extern void pcie_port_device_remove(struct pci_dev *dev);
extern int __must_check pcie_port_bus_register(void);
diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c
index eec89b767f9f..ef3a4eeaebb4 100644
--- a/drivers/pci/pcie/portdrv_bus.c
+++ b/drivers/pci/pcie/portdrv_bus.c
@@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type);
static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
{
struct pcie_device *pciedev;
+ struct pcie_port_data *port_data;
struct pcie_port_service_driver *driver;
if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
return 0;
-
+
pciedev = to_pcie_device(dev);
driver = to_service_driver(drv);
- if ( (driver->id_table->vendor != PCI_ANY_ID &&
- driver->id_table->vendor != pciedev->id.vendor) ||
- (driver->id_table->device != PCI_ANY_ID &&
- driver->id_table->device != pciedev->id.device) ||
- (driver->id_table->port_type != PCIE_ANY_PORT &&
- driver->id_table->port_type != pciedev->id.port_type) ||
- driver->id_table->service_type != pciedev->id.service_type )
+
+ if (driver->service != pciedev->service)
+ return 0;
+
+ port_data = pci_get_drvdata(pciedev->port);
+
+ if (driver->port_type != PCIE_ANY_PORT
+ && driver->port_type != port_data->port_type)
return 0;
return 1;
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 8b3f8c18032f..e39982503863 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -15,10 +15,9 @@
#include <linux/slab.h>
#include <linux/pcieport_if.h>
+#include "../pci.h"
#include "portdrv.h"
-extern int pcie_mch_quirk; /* MSI-quirk Indicator */
-
/**
* release_pcie_device - free PCI Express port service device structure
* @dev: Port service device to release
@@ -31,26 +30,150 @@ static void release_pcie_device(struct device *dev)
kfree(to_pcie_device(dev));
}
-static int is_msi_quirked(struct pci_dev *dev)
+/**
+ * pcie_port_msix_add_entry - add entry to given array of MSI-X entries
+ * @entries: Array of MSI-X entries
+ * @new_entry: Index of the entry to add to the array
+ * @nr_entries: Number of entries aleady in the array
+ *
+ * Return value: Position of the added entry in the array
+ */
+static int pcie_port_msix_add_entry(
+ struct msix_entry *entries, int new_entry, int nr_entries)
{
- int port_type, quirk = 0;
+ int j;
+
+ for (j = 0; j < nr_entries; j++)
+ if (entries[j].entry == new_entry)
+ return j;
+
+ entries[j].entry = new_entry;
+ return j;
+}
+
+/**
+ * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port
+ * @dev: PCI Express port to handle
+ * @vectors: Array of interrupt vectors to populate
+ * @mask: Bitmask of port capabilities returned by get_port_device_capability()
+ *
+ * Return value: 0 on success, error code on failure
+ */
+static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask)
+{
+ struct msix_entry *msix_entries;
+ int idx[PCIE_PORT_DEVICE_MAXSERVICES];
+ int nr_entries, status, pos, i, nvec;
u16 reg16;
+ u32 reg32;
- pci_read_config_word(dev,
- pci_find_capability(dev, PCI_CAP_ID_EXP) +
- PCIE_CAPABILITIES_REG, &reg16);
- port_type = (reg16 >> 4) & PORT_TYPE_MASK;
- switch(port_type) {
- case PCIE_RC_PORT:
- if (pcie_mch_quirk == 1)
- quirk = 1;
- break;
- case PCIE_SW_UPSTREAM_PORT:
- case PCIE_SW_DOWNSTREAM_PORT:
- default:
- break;
+ nr_entries = pci_msix_table_size(dev);
+ if (!nr_entries)
+ return -EINVAL;
+ if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES)
+ nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES;
+
+ msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL);
+ if (!msix_entries)
+ return -ENOMEM;
+
+ /*
+ * Allocate as many entries as the port wants, so that we can check
+ * which of them will be useful. Moreover, if nr_entries is correctly
+ * equal to the number of entries this port actually uses, we'll happily
+ * go through without any tricks.
+ */
+ for (i = 0; i < nr_entries; i++)
+ msix_entries[i].entry = i;
+
+ status = pci_enable_msix(dev, msix_entries, nr_entries);
+ if (status)
+ goto Exit;
+
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ idx[i] = -1;
+ status = -EIO;
+ nvec = 0;
+
+ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
+ int entry;
+
+ /*
+ * The code below follows the PCI Express Base Specification 2.0
+ * stating in Section 6.1.6 that "PME and Hot-Plug Event
+ * interrupts (when both are implemented) always share the same
+ * MSI or MSI-X vector, as indicated by the Interrupt Message
+ * Number field in the PCI Express Capabilities register", where
+ * according to Section 7.8.2 of the specification "For MSI-X,
+ * the value in this field indicates which MSI-X Table entry is
+ * used to generate the interrupt message."
+ */
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, &reg16);
+ entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK;
+ if (entry >= nr_entries)
+ goto Error;
+
+ i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+ if (i == nvec)
+ nvec++;
+
+ idx[PCIE_PORT_SERVICE_PME_SHIFT] = i;
+ idx[PCIE_PORT_SERVICE_HP_SHIFT] = i;
+ }
+
+ if (mask & PCIE_PORT_SERVICE_AER) {
+ int entry;
+
+ /*
+ * The code below follows Section 7.10.10 of the PCI Express
+ * Base Specification 2.0 stating that bits 31-27 of the Root
+ * Error Status Register contain a value indicating which of the
+ * MSI/MSI-X vectors assigned to the port is going to be used
+ * for AER, where "For MSI-X, the value in this register
+ * indicates which MSI-X Table entry is used to generate the
+ * interrupt message."
+ */
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &reg32);
+ entry = reg32 >> 27;
+ if (entry >= nr_entries)
+ goto Error;
+
+ i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+ if (i == nvec)
+ nvec++;
+
+ idx[PCIE_PORT_SERVICE_AER_SHIFT] = i;
}
- return quirk;
+
+ /*
+ * If nvec is equal to the allocated number of entries, we can just use
+ * what we have. Otherwise, the port has some extra entries not for the
+ * services we know and we need to work around that.
+ */
+ if (nvec == nr_entries) {
+ status = 0;
+ } else {
+ /* Drop the temporary MSI-X setup */
+ pci_disable_msix(dev);
+
+ /* Now allocate the MSI-X vectors for real */
+ status = pci_enable_msix(dev, msix_entries, nvec);
+ if (status)
+ goto Exit;
+ }
+
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1;
+
+ Exit:
+ kfree(msix_entries);
+ return status;
+
+ Error:
+ pci_disable_msix(dev);
+ goto Exit;
}
/**
@@ -64,47 +187,32 @@ static int is_msi_quirked(struct pci_dev *dev)
*/
static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
{
- int i, pos, nvec, status = -EINVAL;
- int interrupt_mode = PCIE_PORT_INTx_MODE;
+ struct pcie_port_data *port_data = pci_get_drvdata(dev);
+ int irq, interrupt_mode = PCIE_PORT_NO_IRQ;
+ int i;
- /* Set INTx as default */
- for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
- if (mask & (1 << i))
- nvec++;
- vectors[i] = dev->irq;
- }
-
/* Check MSI quirk */
- if (is_msi_quirked(dev))
- return interrupt_mode;
-
- /* Select MSI-X over MSI if supported */
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
- if (pos) {
- struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] =
- {{0, 0}, {0, 1}, {0, 2}, {0, 3}};
- status = pci_enable_msix(dev, msix_entries, nvec);
- if (!status) {
- int j = 0;
-
- interrupt_mode = PCIE_PORT_MSIX_MODE;
- for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
- if (mask & (1 << i))
- vectors[i] = msix_entries[j++].vector;
- }
- }
- }
- if (status) {
- pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
- if (pos) {
- status = pci_enable_msi(dev);
- if (!status) {
- interrupt_mode = PCIE_PORT_MSI_MODE;
- for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++)
- vectors[i] = dev->irq;
- }
- }
- }
+ if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk)
+ goto Fallback;
+
+ /* Try to use MSI-X if supported */
+ if (!pcie_port_enable_msix(dev, vectors, mask))
+ return PCIE_PORT_MSIX_MODE;
+
+ /* We're not going to use MSI-X, so try MSI and fall back to INTx */
+ if (!pci_enable_msi(dev))
+ interrupt_mode = PCIE_PORT_MSI_MODE;
+
+ Fallback:
+ if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin)
+ interrupt_mode = PCIE_PORT_INTx_MODE;
+
+ irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1;
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ vectors[i] = irq;
+
+ vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1;
+
return interrupt_mode;
}
@@ -132,13 +240,11 @@ static int get_port_device_capability(struct pci_dev *dev)
pos + PCIE_SLOT_CAPABILITIES_REG, &reg32);
if (reg32 & SLOT_HP_CAPABLE_MASK)
services |= PCIE_PORT_SERVICE_HP;
- }
- /* PME Capable - root port capability */
- if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
- services |= PCIE_PORT_SERVICE_PME;
-
+ }
+ /* AER capable */
if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
services |= PCIE_PORT_SERVICE_AER;
+ /* VC support */
if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
services |= PCIE_PORT_SERVICE_VC;
@@ -152,20 +258,17 @@ static int get_port_device_capability(struct pci_dev *dev)
* @port_type: Type of the port
* @service_type: Type of service to associate with the service device
* @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
*/
static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev,
- int port_type, int service_type, int irq, int irq_mode)
+ int service_type, int irq)
{
+ struct pcie_port_data *port_data = pci_get_drvdata(parent);
struct device *device;
+ int port_type = port_data->port_type;
dev->port = parent;
- dev->interrupt_mode = irq_mode;
dev->irq = irq;
- dev->id.vendor = parent->vendor;
- dev->id.device = parent->device;
- dev->id.port_type = port_type;
- dev->id.service_type = (1 << service_type);
+ dev->service = service_type;
/* Initialize generic device interface */
device = &dev->device;
@@ -185,10 +288,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev,
* @port_type: Type of the port
* @service_type: Type of service to associate with the service device
* @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
*/
static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
- int port_type, int service_type, int irq, int irq_mode)
+ int service_type, int irq)
{
struct pcie_device *device;
@@ -196,7 +298,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
if (!device)
return NULL;
- pcie_device_init(parent, device, port_type, service_type, irq,irq_mode);
+ pcie_device_init(parent, device, service_type, irq);
return device;
}
@@ -230,63 +332,90 @@ int pcie_port_device_probe(struct pci_dev *dev)
*/
int pcie_port_device_register(struct pci_dev *dev)
{
- struct pcie_port_device_ext *p_ext;
- int status, type, capabilities, irq_mode, i;
+ struct pcie_port_data *port_data;
+ int status, capabilities, irq_mode, i, nr_serv;
int vectors[PCIE_PORT_DEVICE_MAXSERVICES];
u16 reg16;
- /* Allocate port device extension */
- if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL)))
+ port_data = kzalloc(sizeof(*port_data), GFP_KERNEL);
+ if (!port_data)
return -ENOMEM;
-
- pci_set_drvdata(dev, p_ext);
+ pci_set_drvdata(dev, port_data);
/* Get port type */
pci_read_config_word(dev,
pci_find_capability(dev, PCI_CAP_ID_EXP) +
PCIE_CAPABILITIES_REG, &reg16);
- type = (reg16 >> 4) & PORT_TYPE_MASK;
+ port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK;
- /* Now get port services */
capabilities = get_port_device_capability(dev);
+ /* Root ports are capable of generating PME too */
+ if (port_data->port_type == PCIE_RC_PORT)
+ capabilities |= PCIE_PORT_SERVICE_PME;
+
irq_mode = assign_interrupt_mode(dev, vectors, capabilities);
- p_ext->interrupt_mode = irq_mode;
+ if (irq_mode == PCIE_PORT_NO_IRQ) {
+ /*
+ * Don't use service devices that require interrupts if there is
+ * no way to generate them.
+ */
+ if (!(capabilities & PCIE_PORT_SERVICE_VC)) {
+ status = -ENODEV;
+ goto Error;
+ }
+ capabilities = PCIE_PORT_SERVICE_VC;
+ }
+ port_data->port_irq_mode = irq_mode;
+
+ status = pci_enable_device(dev);
+ if (status)
+ goto Error;
+ pci_set_master(dev);
/* Allocate child services if any */
- for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
+ for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
struct pcie_device *child;
+ int service = 1 << i;
+
+ if (!(capabilities & service))
+ continue;
- if (capabilities & (1 << i)) {
- child = alloc_pcie_device(
- dev, /* parent */
- type, /* port type */
- i, /* service type */
- vectors[i], /* irq */
- irq_mode /* interrupt mode */);
- if (child) {
- status = device_register(&child->device);
- if (status) {
- kfree(child);
- continue;
- }
- get_device(&child->device);
- }
+ child = alloc_pcie_device(dev, service, vectors[i]);
+ if (!child)
+ continue;
+
+ status = device_register(&child->device);
+ if (status) {
+ kfree(child);
+ continue;
}
+
+ get_device(&child->device);
+ nr_serv++;
+ }
+ if (!nr_serv) {
+ pci_disable_device(dev);
+ status = -ENODEV;
+ goto Error;
}
+
return 0;
+
+ Error:
+ kfree(port_data);
+ return status;
}
#ifdef CONFIG_PM
static int suspend_iter(struct device *dev, void *data)
{
struct pcie_port_service_driver *service_driver;
- pm_message_t state = * (pm_message_t *) data;
if ((dev->bus == &pcie_port_bus_type) &&
(dev->driver)) {
service_driver = to_service_driver(dev->driver);
if (service_driver->suspend)
- service_driver->suspend(to_pcie_device(dev), state);
+ service_driver->suspend(to_pcie_device(dev));
}
return 0;
}
@@ -294,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data)
/**
* pcie_port_device_suspend - suspend port services associated with a PCIe port
* @dev: PCI Express port to handle
- * @state: Representation of system power management transition in progress
*/
-int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state)
+int pcie_port_device_suspend(struct device *dev)
{
- return device_for_each_child(&dev->dev, &state, suspend_iter);
+ return device_for_each_child(dev, NULL, suspend_iter);
}
static int resume_iter(struct device *dev, void *data)
@@ -318,24 +446,17 @@ static int resume_iter(struct device *dev, void *data)
* pcie_port_device_suspend - resume port services associated with a PCIe port
* @dev: PCI Express port to handle
*/
-int pcie_port_device_resume(struct pci_dev *dev)
+int pcie_port_device_resume(struct device *dev)
{
- return device_for_each_child(&dev->dev, NULL, resume_iter);
+ return device_for_each_child(dev, NULL, resume_iter);
}
-#endif
+#endif /* PM */
static int remove_iter(struct device *dev, void *data)
{
- struct pcie_port_service_driver *service_driver;
-
if (dev->bus == &pcie_port_bus_type) {
- if (dev->driver) {
- service_driver = to_service_driver(dev->driver);
- if (service_driver->remove)
- service_driver->remove(to_pcie_device(dev));
- }
- *(unsigned long*)data = (unsigned long)dev;
- return 1;
+ put_device(dev);
+ device_unregister(dev);
}
return 0;
}
@@ -349,25 +470,21 @@ static int remove_iter(struct device *dev, void *data)
*/
void pcie_port_device_remove(struct pci_dev *dev)
{
- struct device *device;
- unsigned long device_addr;
- int interrupt_mode = PCIE_PORT_INTx_MODE;
- int status;
+ struct pcie_port_data *port_data = pci_get_drvdata(dev);
- do {
- status = device_for_each_child(&dev->dev, &device_addr, remove_iter);
- if (status) {
- device = (struct device*)device_addr;
- interrupt_mode = (to_pcie_device(device))->interrupt_mode;
- put_device(device);
- device_unregister(device);
- }
- } while (status);
- /* Switch to INTx by default if MSI enabled */
- if (interrupt_mode == PCIE_PORT_MSIX_MODE)
+ device_for_each_child(&dev->dev, NULL, remove_iter);
+ pci_disable_device(dev);
+
+ switch (port_data->port_irq_mode) {
+ case PCIE_PORT_MSIX_MODE:
pci_disable_msix(dev);
- else if (interrupt_mode == PCIE_PORT_MSI_MODE)
+ break;
+ case PCIE_PORT_MSI_MODE:
pci_disable_msi(dev);
+ break;
+ }
+
+ kfree(port_data);
}
/**
@@ -392,7 +509,7 @@ static int pcie_port_probe_service(struct device *dev)
return -ENODEV;
pciedev = to_pcie_device(dev);
- status = driver->probe(pciedev, driver->id_table);
+ status = driver->probe(pciedev);
if (!status) {
dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n",
driver->name);
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 5ea566e20b37..b924e2463f85 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -32,11 +32,6 @@ MODULE_LICENSE("GPL");
/* global data */
static const char device_name[] = "pcieport-driver";
-static int pcie_portdrv_save_config(struct pci_dev *dev)
-{
- return pci_save_state(dev);
-}
-
static int pcie_portdrv_restore_config(struct pci_dev *dev)
{
int retval;
@@ -49,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev)
}
#ifdef CONFIG_PM
-static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state)
-{
- return pcie_port_device_suspend(dev, state);
+static struct dev_pm_ops pcie_portdrv_pm_ops = {
+ .suspend = pcie_port_device_suspend,
+ .resume = pcie_port_device_resume,
+ .freeze = pcie_port_device_suspend,
+ .thaw = pcie_port_device_resume,
+ .poweroff = pcie_port_device_suspend,
+ .restore = pcie_port_device_resume,
+};
-}
+#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops)
-static int pcie_portdrv_resume(struct pci_dev *dev)
-{
- pci_set_master(dev);
- return pcie_port_device_resume(dev);
-}
-#else
-#define pcie_portdrv_suspend NULL
-#define pcie_portdrv_resume NULL
-#endif
+#else /* !PM */
+
+#define PCIE_PORTDRV_PM_OPS NULL
+#endif /* !PM */
/*
* pcie_portdrv_probe - Probe PCI-Express port devices
@@ -82,20 +77,15 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
if (status)
return status;
- if (pci_enable_device(dev) < 0)
- return -ENODEV;
-
- pci_set_master(dev);
if (!dev->irq && dev->pin) {
dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
"check vendor BIOS\n", dev->vendor, dev->device);
}
- if (pcie_port_device_register(dev)) {
- pci_disable_device(dev);
- return -ENOMEM;
- }
+ status = pcie_port_device_register(dev);
+ if (status)
+ return status;
- pcie_portdrv_save_config(dev);
+ pci_save_state(dev);
return 0;
}
@@ -104,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev)
{
pcie_port_device_remove(dev);
pci_disable_device(dev);
- kfree(pci_get_drvdata(dev));
}
static int error_detected_iter(struct device *device, void *data)
@@ -278,10 +267,9 @@ static struct pci_driver pcie_portdriver = {
.probe = pcie_portdrv_probe,
.remove = pcie_portdrv_remove,
- .suspend = pcie_portdrv_suspend,
- .resume = pcie_portdrv_resume,
-
.err_handler = &pcie_portdrv_err_handler,
+
+ .driver.pm = PCIE_PORTDRV_PM_OPS,
};
static int __init pcie_portdrv_init(void)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 55ec44a27e89..e2f3dd098cfa 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
struct resource *res;
int i;
- if (!dev) /* It's a host bus, nothing to read */
+ if (!child->parent) /* It's a host bus, nothing to read */
return;
if (dev->transparent) {
@@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
/*
* If we already got to this bus through a different bridge,
- * ignore it. This can happen with the i450NX chipset.
+ * don't re-add it. This can happen with the i450NX chipset.
+ *
+ * However, we continue to descend down the hierarchy and
+ * scan remaining child buses.
*/
- if (pci_find_bus(pci_domain_nr(bus), busnr)) {
- dev_info(&dev->dev, "bus %04x:%02x already known\n",
- pci_domain_nr(bus), busnr);
- goto out;
+ child = pci_find_bus(pci_domain_nr(bus), busnr);
+ if (!child) {
+ child = pci_add_new_bus(bus, dev, busnr);
+ if (!child)
+ goto out;
+ child->primary = buses & 0xFF;
+ child->subordinate = (buses >> 16) & 0xFF;
+ child->bridge_ctl = bctl;
}
- child = pci_add_new_bus(bus, dev, busnr);
- if (!child)
- goto out;
- child->primary = buses & 0xFF;
- child->subordinate = (buses >> 16) & 0xFF;
- child->bridge_ctl = bctl;
-
cmax = pci_scan_child_bus(child);
if (cmax > max)
max = cmax;
@@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev)
dev->irq = irq;
}
+static void set_pcie_port_type(struct pci_dev *pdev)
+{
+ int pos;
+ u16 reg16;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return;
+ pdev->is_pcie = 1;
+ pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+ pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
#define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED)
/**
@@ -683,12 +696,33 @@ static void pci_read_irq(struct pci_dev *dev)
* Initialize the device structure with information about the device's
* vendor,class,memory and IO-space addresses,IRQ lines etc.
* Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
*/
-static int pci_setup_device(struct pci_dev * dev)
+int pci_setup_device(struct pci_dev *dev)
{
u32 class;
+ u8 hdr_type;
+ struct pci_slot *slot;
+
+ if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+ return -EIO;
+
+ dev->sysdata = dev->bus->sysdata;
+ dev->dev.parent = dev->bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->hdr_type = hdr_type & 0x7f;
+ dev->multifunction = !!(hdr_type & 0x80);
+ dev->error_state = pci_channel_io_normal;
+ set_pcie_port_type(dev);
+
+ list_for_each_entry(slot, &dev->bus->slots, list)
+ if (PCI_SLOT(dev->devfn) == slot->number)
+ dev->slot = slot;
+
+ /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+ set this higher, assuming the system even supports it. */
+ dev->dma_mask = 0xffffffff;
dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
dev->bus->number, PCI_SLOT(dev->devfn),
@@ -703,12 +737,14 @@ static int pci_setup_device(struct pci_dev * dev)
dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
dev->vendor, dev->device, class, dev->hdr_type);
+ /* need to have dev->class ready */
+ dev->cfg_size = pci_cfg_space_size(dev);
+
/* "Unknown power state" */
dev->current_state = PCI_UNKNOWN;
/* Early fixups, before probing the BARs */
pci_fixup_device(pci_fixup_early, dev);
- class = dev->class >> 8;
switch (dev->hdr_type) { /* header type */
case PCI_HEADER_TYPE_NORMAL: /* standard header */
@@ -770,7 +806,7 @@ static int pci_setup_device(struct pci_dev * dev)
default: /* unknown header */
dev_err(&dev->dev, "unknown header type %02x, "
"ignoring device\n", dev->hdr_type);
- return -1;
+ return -EIO;
bad:
dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
@@ -785,6 +821,7 @@ static int pci_setup_device(struct pci_dev * dev)
static void pci_release_capabilities(struct pci_dev *dev)
{
pci_vpd_release(dev);
+ pci_iov_release(dev);
}
/**
@@ -803,19 +840,6 @@ static void pci_release_dev(struct device *dev)
kfree(pci_dev);
}
-static void set_pcie_port_type(struct pci_dev *pdev)
-{
- int pos;
- u16 reg16;
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (!pos)
- return;
- pdev->is_pcie = 1;
- pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
- pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
-}
-
/**
* pci_cfg_space_size - get the configuration space size of the PCI device.
* @dev: PCI device
@@ -847,6 +871,11 @@ int pci_cfg_space_size(struct pci_dev *dev)
{
int pos;
u32 status;
+ u16 class;
+
+ class = dev->class >> 8;
+ if (class == PCI_CLASS_BRIDGE_HOST)
+ return pci_cfg_space_size_ext(dev);
pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (!pos) {
@@ -891,9 +920,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
- struct pci_slot *slot;
u32 l;
- u8 hdr_type;
int delay = 1;
if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
@@ -920,34 +947,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
}
}
- if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type))
- return NULL;
-
dev = alloc_pci_dev();
if (!dev)
return NULL;
dev->bus = bus;
- dev->sysdata = bus->sysdata;
- dev->dev.parent = bus->bridge;
- dev->dev.bus = &pci_bus_type;
dev->devfn = devfn;
- dev->hdr_type = hdr_type & 0x7f;
- dev->multifunction = !!(hdr_type & 0x80);
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
- dev->cfg_size = pci_cfg_space_size(dev);
- dev->error_state = pci_channel_io_normal;
- set_pcie_port_type(dev);
-
- list_for_each_entry(slot, &bus->slots, list)
- if (PCI_SLOT(devfn) == slot->number)
- dev->slot = slot;
- /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
- set this higher, assuming the system even supports it. */
- dev->dma_mask = 0xffffffff;
- if (pci_setup_device(dev) < 0) {
+ if (pci_setup_device(dev)) {
kfree(dev);
return NULL;
}
@@ -972,6 +981,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
/* Alternative Routing-ID Forwarding */
pci_enable_ari(dev);
+
+ /* Single Root I/O Virtualization */
+ pci_iov_init(dev);
}
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1006,6 +1018,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
+ dev = pci_get_slot(bus, devfn);
+ if (dev) {
+ pci_dev_put(dev);
+ return dev;
+ }
+
dev = pci_scan_device(bus, devfn);
if (!dev)
return NULL;
@@ -1024,35 +1042,27 @@ EXPORT_SYMBOL(pci_scan_single_device);
* Scan a PCI slot on the specified PCI bus for devices, adding
* discovered devices to the @bus->devices list. New devices
* will not have is_added set.
+ *
+ * Returns the number of new devices found.
*/
int pci_scan_slot(struct pci_bus *bus, int devfn)
{
- int func, nr = 0;
- int scan_all_fns;
-
- scan_all_fns = pcibios_scan_all_fns(bus, devfn);
-
- for (func = 0; func < 8; func++, devfn++) {
- struct pci_dev *dev;
-
- dev = pci_scan_single_device(bus, devfn);
- if (dev) {
- nr++;
+ int fn, nr = 0;
+ struct pci_dev *dev;
- /*
- * If this is a single function device,
- * don't scan past the first function.
- */
- if (!dev->multifunction) {
- if (func > 0) {
- dev->multifunction = 1;
- } else {
- break;
- }
+ dev = pci_scan_single_device(bus, devfn);
+ if (dev && !dev->is_added) /* new device? */
+ nr++;
+
+ if ((dev && dev->multifunction) ||
+ (!dev && pcibios_scan_all_fns(bus, devfn))) {
+ for (fn = 1; fn < 8; fn++) {
+ dev = pci_scan_single_device(bus, devfn + fn);
+ if (dev) {
+ if (!dev->is_added)
+ nr++;
+ dev->multifunction = 1;
}
- } else {
- if (func == 0 && !scan_all_fns)
- break;
}
}
@@ -1074,12 +1084,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
for (devfn = 0; devfn < 0x100; devfn += 8)
pci_scan_slot(bus, devfn);
+ /* Reserve buses for SR-IOV capability. */
+ max += pci_iov_bus_range(bus);
+
/*
* After performing arch-dependent fixup of the bus, look behind
* all PCI-to-PCI bridges on this bus.
*/
- pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number);
- pcibios_fixup_bus(bus);
+ if (!bus->is_added) {
+ pr_debug("PCI: Fixups for bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+ pcibios_fixup_bus(bus);
+ if (pci_is_root_bus(bus))
+ bus->is_added = 1;
+ }
+
for (pass=0; pass < 2; pass++)
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
@@ -1114,7 +1133,7 @@ struct pci_bus * pci_create_bus(struct device *parent,
if (!b)
return NULL;
- dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev){
kfree(b);
return NULL;
@@ -1133,7 +1152,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
list_add_tail(&b->node, &pci_root_buses);
up_write(&pci_bus_sem);
- memset(dev, 0, sizeof(*dev));
dev->parent = parent;
dev->release = pci_release_bus_bridge_dev;
dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus);
@@ -1193,6 +1211,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
EXPORT_SYMBOL(pci_scan_bus_parented);
#ifdef CONFIG_HOTPLUG
+/**
+ * pci_rescan_bus - scan a PCI bus for devices.
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, adds them,
+ * and enables them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int __devinit pci_rescan_bus(struct pci_bus *bus)
+{
+ unsigned int max;
+ struct pci_dev *dev;
+
+ max = pci_scan_child_bus(bus);
+
+ down_read(&pci_bus_sem);
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ if (dev->subordinate)
+ pci_bus_size_bridges(dev->subordinate);
+ up_read(&pci_bus_sem);
+
+ pci_bus_assign_resources(bus);
+ pci_enable_bridges(bus);
+ pci_bus_add_devices(bus);
+
+ return max;
+}
+EXPORT_SYMBOL_GPL(pci_rescan_bus);
+
EXPORT_SYMBOL(pci_add_new_bus);
EXPORT_SYMBOL(pci_scan_slot);
EXPORT_SYMBOL(pci_scan_bridge);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 92b9efe9bcaf..9b2f0d96900d 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,7 @@
#include <linux/kallsyms.h>
#include <linux/dmi.h>
#include <linux/pci-aspm.h>
+#include <linux/ioport.h>
#include "pci.h"
int isa_dma_bridge_buggy;
@@ -34,6 +35,65 @@ int pcie_mch_quirk;
EXPORT_SYMBOL(pcie_mch_quirk);
#ifdef CONFIG_PCI_QUIRKS
+/*
+ * This quirk function disables the device and releases resources
+ * which is specified by kernel's boot parameter 'pci=resource_alignment='.
+ * It also rounds up size to specified alignment.
+ * Later on, the kernel will assign page-aligned memory resource back
+ * to that device.
+ */
+static void __devinit quirk_resource_alignment(struct pci_dev *dev)
+{
+ int i;
+ struct resource *r;
+ resource_size_t align, size;
+
+ if (!pci_is_reassigndev(dev))
+ return;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+ dev_warn(&dev->dev,
+ "Can't reassign resources to host bridge.\n");
+ return;
+ }
+
+ dev_info(&dev->dev, "Disabling device and release resources.\n");
+ pci_disable_device(dev);
+
+ align = pci_specified_resource_alignment(dev);
+ for (i=0; i < PCI_BRIDGE_RESOURCES; i++) {
+ r = &dev->resource[i];
+ if (!(r->flags & IORESOURCE_MEM))
+ continue;
+ size = resource_size(r);
+ if (size < align) {
+ size = align;
+ dev_info(&dev->dev,
+ "Rounding up size of resource #%d to %#llx.\n",
+ i, (unsigned long long)size);
+ }
+ r->end = size - 1;
+ r->start = 0;
+ }
+ /* Need to disable bridge's resource window,
+ * to enable the kernel to reassign new resource
+ * window later on.
+ */
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+ r = &dev->resource[i];
+ if (!(r->flags & IORESOURCE_MEM))
+ continue;
+ r->end = resource_size(r) - 1;
+ r->start = 0;
+ }
+ pci_disable_bridge_window(dev);
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
+
/* The Mellanox Tavor device gives false positive parity errors
* Mark this device with a broken_parity_status, to allow
* PCI scanning code to "skip" this now blacklisted device.
@@ -1126,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
* its on-board VGA controller */
asus_hides_smbus = 1;
}
- else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG)
+ else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2)
switch(dev->subsystem_device) {
case 0x00b8: /* Compaq Evo D510 CMT */
case 0x00b9: /* Compaq Evo D510 SFF */
+ /* Motherboard doesn't have Host bridge
+ * subvendor/subdevice IDs and on-board VGA
+ * controller is disabled if an AGP card is
+ * inserted, therefore checking USB UHCI
+ * Controller #1 */
asus_hides_smbus = 1;
}
else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
@@ -1154,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_2, asus_hides_smbus_hostbridge);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge);
static void asus_hides_smbus_lpc(struct pci_dev *dev)
@@ -1664,9 +1729,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
* of parallel ports and <S> is the number of serial ports.
*/
switch (dev->device) {
+ case PCI_DEVICE_ID_NETMOS_9835:
+ /* Well, this rule doesn't hold for the following 9835 device */
+ if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
+ dev->subsystem_device == 0x0299)
+ return;
case PCI_DEVICE_ID_NETMOS_9735:
case PCI_DEVICE_ID_NETMOS_9745:
- case PCI_DEVICE_ID_NETMOS_9835:
case PCI_DEVICE_ID_NETMOS_9845:
case PCI_DEVICE_ID_NETMOS_9855:
if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL &&
@@ -2078,6 +2147,92 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NVENET_15,
nvenet_msi_disable);
+static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
+{
+ int pos, ttl = 48;
+ int found = 0;
+
+ /* check if there is HT MSI cap or enabled on this device */
+ pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+ while (pos && ttl--) {
+ u8 flags;
+
+ if (found < 1)
+ found = 1;
+ if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+ &flags) == 0) {
+ if (flags & HT_MSI_FLAGS_ENABLE) {
+ if (found < 2) {
+ found = 2;
+ break;
+ }
+ }
+ }
+ pos = pci_find_next_ht_capability(dev, pos,
+ HT_CAPTYPE_MSI_MAPPING);
+ }
+
+ return found;
+}
+
+static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge)
+{
+ struct pci_dev *dev;
+ int pos;
+ int i, dev_no;
+ int found = 0;
+
+ dev_no = host_bridge->devfn >> 3;
+ for (i = dev_no + 1; i < 0x20; i++) {
+ dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0));
+ if (!dev)
+ continue;
+
+ /* found next host bridge ?*/
+ pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+ if (pos != 0) {
+ pci_dev_put(dev);
+ break;
+ }
+
+ if (ht_check_msi_mapping(dev)) {
+ found = 1;
+ pci_dev_put(dev);
+ break;
+ }
+ pci_dev_put(dev);
+ }
+
+ return found;
+}
+
+#define PCI_HT_CAP_SLAVE_CTRL0 4 /* link control */
+#define PCI_HT_CAP_SLAVE_CTRL1 8 /* link control to */
+
+static int __devinit is_end_of_ht_chain(struct pci_dev *dev)
+{
+ int pos, ctrl_off;
+ int end = 0;
+ u16 flags, ctrl;
+
+ pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+
+ if (!pos)
+ goto out;
+
+ pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags);
+
+ ctrl_off = ((flags >> 10) & 1) ?
+ PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1;
+ pci_read_config_word(dev, pos + ctrl_off, &ctrl);
+
+ if (ctrl & (1 << 6))
+ end = 1;
+
+out:
+ return end;
+}
+
static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
{
struct pci_dev *host_bridge;
@@ -2102,6 +2257,11 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
if (!found)
return;
+ /* don't enable end_device/host_bridge with leaf directly here */
+ if (host_bridge == dev && is_end_of_ht_chain(host_bridge) &&
+ host_bridge_with_leaf(host_bridge))
+ goto out;
+
/* root did that ! */
if (msi_ht_cap_enabled(host_bridge))
goto out;
@@ -2132,44 +2292,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev)
}
}
-static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
-{
- int pos, ttl = 48;
- int found = 0;
-
- /* check if there is HT MSI cap or enabled on this device */
- pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
- while (pos && ttl--) {
- u8 flags;
-
- if (found < 1)
- found = 1;
- if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
- &flags) == 0) {
- if (flags & HT_MSI_FLAGS_ENABLE) {
- if (found < 2) {
- found = 2;
- break;
- }
- }
- }
- pos = pci_find_next_ht_capability(dev, pos,
- HT_CAPTYPE_MSI_MAPPING);
- }
-
- return found;
-}
-
-static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
+static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
{
struct pci_dev *host_bridge;
int pos;
int found;
- /* Enabling HT MSI mapping on this device breaks MCP51 */
- if (dev->device == 0x270)
- return;
-
/* check if there is HT MSI cap or enabled on this device */
found = ht_check_msi_mapping(dev);
@@ -2193,7 +2321,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
/* Host bridge is to HT */
if (found == 1) {
/* it is not enabled, try to enable it */
- nv_ht_enable_msi_mapping(dev);
+ if (all)
+ ht_enable_msi_mapping(dev);
+ else
+ nv_ht_enable_msi_mapping(dev);
}
return;
}
@@ -2205,8 +2336,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
/* Host bridge is not to HT, disable HT MSI mapping on this device */
ht_disable_msi_mapping(dev);
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk);
+
+static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
+{
+ return __nv_msi_ht_cap_quirk(dev, 1);
+}
+
+static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev)
+{
+ return __nv_msi_ht_cap_quirk(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev)
{
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 042e08924421..86503c14ce7e 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus)
down_write(&pci_bus_sem);
list_del(&pci_bus->node);
up_write(&pci_bus_sem);
+ if (!pci_bus->is_added)
+ return;
+
pci_remove_legacy_files(pci_bus);
device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
@@ -92,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus);
*/
void pci_remove_bus_device(struct pci_dev *dev)
{
+ pci_stop_bus_device(dev);
if (dev->subordinate) {
struct pci_bus *b = dev->subordinate;
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 5af8bd538149..710d4ea69568 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
if (pdev->is_pcie)
return NULL;
while (1) {
- if (!pdev->bus->self)
+ if (!pdev->bus->parent)
break;
pdev = pdev->bus->self;
/* a p2p bridge */
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 704608945780..334285a8e237 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -27,7 +27,7 @@
#include <linux/slab.h>
-static void pbus_assign_resources_sorted(struct pci_bus *bus)
+static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
struct resource *res;
@@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus)
struct pci_bus_region region;
u32 l, bu, lu, io_upper16;
+ if (!pci_is_root_bus(bus) && bus->is_added)
+ return;
+
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
@@ -495,7 +498,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
}
EXPORT_SYMBOL(pci_bus_size_bridges);
-void __ref pci_bus_assign_resources(struct pci_bus *bus)
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
struct pci_dev *dev;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 32e8d88a4619..3039fcb86afc 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
return err;
}
+#ifdef CONFIG_PCI_QUIRKS
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+ dev_dbg(&dev->dev, "Disabling bridge window.\n");
+
+ /* MMIO Base/Limit */
+ pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+
+ /* Prefetchable MMIO Base/Limit */
+ pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+ pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+ pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+#endif /* CONFIG_PCI_QUIRKS */
+
int pci_assign_resource(struct pci_dev *dev, int resno)
{
struct pci_bus *bus = dev->bus;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 5a8ccb4f604d..21189447e545 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -1,8 +1,8 @@
/*
* drivers/pci/slot.c
* Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
- * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P.
- * Alex Chiang <achiang@hp.com>
+ * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P.
+ * Alex Chiang <achiang@hp.com>
*/
#include <linux/kobject.h>
@@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj)
struct pci_dev *dev;
struct pci_slot *slot = to_pci_slot(kobj);
- pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
- slot->bus->number, slot->number);
+ dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
+ slot->number, pci_slot_name(slot));
list_for_each_entry(dev, &slot->bus->devices, bus_list)
if (PCI_SLOT(dev->devfn) == slot->number)
@@ -248,9 +248,8 @@ placeholder:
if (PCI_SLOT(dev->devfn) == slot_nr)
dev->slot = slot;
- /* Don't care if debug printk has a -1 for slot_nr */
- pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
- __func__, pci_domain_nr(parent), parent->number, slot_nr);
+ dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
+ slot_nr, pci_slot_name(slot));
out:
kfree(slot_name);
@@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot);
*/
void pci_destroy_slot(struct pci_slot *slot)
{
- pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__,
- atomic_read(&slot->kobj.kref.refcount) - 1,
- pci_domain_nr(slot->bus), slot->bus->number, slot->number);
+ dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
+ slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);
down_write(&pci_bus_sem);
kobject_put(&slot->kobj);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 78199151c00b..d047f846c3ed 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -257,6 +257,40 @@ void __init acpi_no_s4_hw_signature(void);
void __init acpi_old_suspend_ordering(void);
void __init acpi_s4_no_nvs(void);
#endif /* CONFIG_PM_SLEEP */
+
+#define OSC_QUERY_TYPE 0
+#define OSC_SUPPORT_TYPE 1
+#define OSC_CONTROL_TYPE 2
+#define OSC_SUPPORT_MASKS 0x1f
+
+/* _OSC DW0 Definition */
+#define OSC_QUERY_ENABLE 1
+#define OSC_REQUEST_ERROR 2
+#define OSC_INVALID_UUID_ERROR 4
+#define OSC_INVALID_REVISION_ERROR 8
+#define OSC_CAPABILITIES_MASK_ERROR 16
+
+/* _OSC DW1 Definition (OS Support Fields) */
+#define OSC_EXT_PCI_CONFIG_SUPPORT 1
+#define OSC_ACTIVE_STATE_PWR_SUPPORT 2
+#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4
+#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8
+#define OSC_MSI_SUPPORT 16
+
+/* _OSC DW1 Definition (OS Control Fields) */
+#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1
+#define OSC_SHPC_NATIVE_HP_CONTROL 2
+#define OSC_PCI_EXPRESS_PME_CONTROL 4
+#define OSC_PCI_EXPRESS_AER_CONTROL 8
+#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16
+
+#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \
+ OSC_SHPC_NATIVE_HP_CONTROL | \
+ OSC_PCI_EXPRESS_PME_CONTROL | \
+ OSC_PCI_EXPRESS_AER_CONTROL | \
+ OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+
+extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
#else /* CONFIG_ACPI */
static inline int early_acpi_boot_init(void)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index d2b8a1e8ca11..6991ab5b24d1 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -20,20 +20,23 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
struct msi_desc {
struct {
- __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */
+ __u8 is_msix : 1;
+ __u8 multiple: 3; /* log2 number of messages */
__u8 maskbit : 1; /* mask-pending bit supported ? */
- __u8 masked : 1;
__u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */
__u8 pos; /* Location of the msi capability */
- __u32 maskbits_mask; /* mask bits mask */
__u16 entry_nr; /* specific enabled entry */
unsigned default_irq; /* default pre-assigned irq */
- }msi_attrib;
+ } msi_attrib;
+ u32 masked; /* mask bits */
unsigned int irq;
struct list_head list;
- void __iomem *mask_base;
+ union {
+ void __iomem *mask_base;
+ u8 mask_pos;
+ };
struct pci_dev *dev;
/* Last set MSI message */
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 042c166f65d5..092e82e0048c 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -10,72 +10,25 @@
#include <linux/acpi.h>
-#define OSC_QUERY_TYPE 0
-#define OSC_SUPPORT_TYPE 1
-#define OSC_CONTROL_TYPE 2
-#define OSC_SUPPORT_MASKS 0x1f
-
-/*
- * _OSC DW0 Definition
- */
-#define OSC_QUERY_ENABLE 1
-#define OSC_REQUEST_ERROR 2
-#define OSC_INVALID_UUID_ERROR 4
-#define OSC_INVALID_REVISION_ERROR 8
-#define OSC_CAPABILITIES_MASK_ERROR 16
-
-/*
- * _OSC DW1 Definition (OS Support Fields)
- */
-#define OSC_EXT_PCI_CONFIG_SUPPORT 1
-#define OSC_ACTIVE_STATE_PWR_SUPPORT 2
-#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4
-#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8
-#define OSC_MSI_SUPPORT 16
-
-/*
- * _OSC DW1 Definition (OS Control Fields)
- */
-#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1
-#define OSC_SHPC_NATIVE_HP_CONTROL 2
-#define OSC_PCI_EXPRESS_PME_CONTROL 4
-#define OSC_PCI_EXPRESS_AER_CONTROL 8
-#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16
-
-#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \
- OSC_SHPC_NATIVE_HP_CONTROL | \
- OSC_PCI_EXPRESS_PME_CONTROL | \
- OSC_PCI_EXPRESS_AER_CONTROL | \
- OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
-
#ifdef CONFIG_ACPI
-extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
-int pci_acpi_osc_support(acpi_handle handle, u32 flags);
static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
{
- /* Find root host bridge */
- while (pdev->bus->self)
- pdev = pdev->bus->self;
-
- return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus),
- pdev->bus->number);
+ struct pci_bus *pbus = pdev->bus;
+ /* Find a PCI root bus */
+ while (pbus->parent)
+ pbus = pbus->parent;
+ return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
+ pbus->number);
}
static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
{
- int seg = pci_domain_nr(pbus), busnr = pbus->number;
- struct pci_dev *bridge = pbus->self;
- if (bridge)
- return DEVICE_ACPI_HANDLE(&(bridge->dev));
- return acpi_get_pci_rootbridge_handle(seg, busnr);
+ if (pbus->parent)
+ return DEVICE_ACPI_HANDLE(&(pbus->self->dev));
+ return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
+ pbus->number);
}
#else
-#if !defined(AE_ERROR)
-typedef u32 acpi_status;
-#define AE_ERROR (acpi_status) (0x0001)
-#endif
-static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{return AE_ERROR;}
static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
{ return NULL; }
#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index df3644132617..a7fe4bbd7ff1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -52,6 +52,7 @@
#include <asm/atomic.h>
#include <linux/device.h>
#include <linux/io.h>
+#include <linux/irqreturn.h>
/* Include the ID list */
#include <linux/pci_ids.h>
@@ -93,6 +94,12 @@ enum {
/* #6: expansion ROM resource */
PCI_ROM_RESOURCE,
+ /* device specific resources */
+#ifdef CONFIG_PCI_IOV
+ PCI_IOV_RESOURCES,
+ PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
+#endif
+
/* resources assigned to buses behind the bridge */
#define PCI_BRIDGE_RESOURCE_NUM 4
@@ -180,6 +187,7 @@ struct pci_cap_saved_state {
struct pcie_link_state;
struct pci_vpd;
+struct pci_sriov;
/*
* The pci_dev structure is used to describe PCI devices.
@@ -257,6 +265,8 @@ struct pci_dev {
unsigned int is_managed:1;
unsigned int is_pcie:1;
unsigned int state_saved:1;
+ unsigned int is_physfn:1;
+ unsigned int is_virtfn:1;
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
@@ -270,6 +280,12 @@ struct pci_dev {
struct list_head msi_list;
#endif
struct pci_vpd *vpd;
+#ifdef CONFIG_PCI_IOV
+ union {
+ struct pci_sriov *sriov; /* SR-IOV capability related */
+ struct pci_dev *physfn; /* the PF this VF is associated with */
+ };
+#endif
};
extern struct pci_dev *alloc_pci_dev(void);
@@ -341,6 +357,15 @@ struct pci_bus {
#define pci_bus_b(n) list_entry(n, struct pci_bus, node)
#define to_pci_bus(n) container_of(n, struct pci_bus, dev)
+/*
+ * Returns true if the pci bus is root (behind host-pci bridge),
+ * false otherwise
+ */
+static inline bool pci_is_root_bus(struct pci_bus *pbus)
+{
+ return !(pbus->parent);
+}
+
#ifdef CONFIG_PCI_MSI
static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
{
@@ -528,7 +553,7 @@ void pcibios_update_irq(struct pci_dev *, int irq);
/* Generic PCI functions used internally */
extern struct pci_bus *pci_find_bus(int domain, int busnr);
-void pci_bus_add_devices(struct pci_bus *bus);
+void pci_bus_add_devices(const struct pci_bus *bus);
struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata);
static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
@@ -702,6 +727,9 @@ int pci_back_from_sleep(struct pci_dev *dev);
/* Functions for PCI Hotplug drivers to use */
int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
+#ifdef CONFIG_HOTPLUG
+unsigned int pci_rescan_bus(struct pci_bus *bus);
+#endif
/* Vital product data routines */
ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
@@ -709,7 +737,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void
int pci_vpd_truncate(struct pci_dev *dev, size_t size);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
-void pci_bus_assign_resources(struct pci_bus *bus);
+void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);
@@ -790,7 +818,7 @@ struct msix_entry {
#ifndef CONFIG_PCI_MSI
-static inline int pci_enable_msi(struct pci_dev *dev)
+static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
{
return -1;
}
@@ -800,6 +828,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev)
static inline void pci_disable_msi(struct pci_dev *dev)
{ }
+static inline int pci_msix_table_size(struct pci_dev *dev)
+{
+ return 0;
+}
static inline int pci_enable_msix(struct pci_dev *dev,
struct msix_entry *entries, int nvec)
{
@@ -821,9 +853,10 @@ static inline int pci_msi_enabled(void)
return 0;
}
#else
-extern int pci_enable_msi(struct pci_dev *dev);
+extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
extern void pci_msi_shutdown(struct pci_dev *dev);
extern void pci_disable_msi(struct pci_dev *dev);
+extern int pci_msix_table_size(struct pci_dev *dev);
extern int pci_enable_msix(struct pci_dev *dev,
struct msix_entry *entries, int nvec);
extern void pci_msix_shutdown(struct pci_dev *dev);
@@ -842,6 +875,8 @@ static inline int pcie_aspm_enabled(void)
extern int pcie_aspm_enabled(void);
#endif
+#define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1)
+
#ifdef CONFIG_HT_IRQ
/* The functions a driver should call */
int ht_create_irq(struct pci_dev *dev, int idx);
@@ -1195,5 +1230,23 @@ int pci_ext_cfg_avail(struct pci_dev *dev);
void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
+#ifdef CONFIG_PCI_IOV
+extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+extern void pci_disable_sriov(struct pci_dev *dev);
+extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+#else
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+ return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+ return IRQ_NONE;
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* LINUX_PCI_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e5816dd33371..cb14fd260837 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2396,6 +2396,7 @@
#define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c
#define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0
#define PCI_DEVICE_ID_INTEL_82801DB_1 0x24c1
+#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2
#define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3
#define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5
#define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 027815b4635e..e4d08c1b2e0b 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -235,7 +235,7 @@
#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */
#define PCI_PM_CTRL 4 /* PM control and status register */
#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
-#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */
#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
@@ -375,6 +375,7 @@
#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */
+#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */
#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
#define PCI_EXP_DEVCAP 4 /* Device capabilities */
@@ -487,6 +488,8 @@
#define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */
#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
#define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */
+#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */
+#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */
/* Extended Capabilities (PCI-X 2.0 and Express) */
#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff)
@@ -498,6 +501,7 @@
#define PCI_EXT_CAP_ID_DSN 3
#define PCI_EXT_CAP_ID_PWR 4
#define PCI_EXT_CAP_ID_ARI 14
+#define PCI_EXT_CAP_ID_SRIOV 16
/* Advanced Error Reporting */
#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
@@ -615,4 +619,35 @@
#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */
#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
+#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */
+#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */
+#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */
+#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */
+#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */
+#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */
+#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */
+#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
+#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */
+#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */
+#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */
+#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */
+#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/
+#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */
+#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */
+#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */
+#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */
+#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */
+#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */
+
#endif /* LINUX_PCI_REGS_H */
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index 6cd91e3f9820..b4c79545330b 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -16,29 +16,30 @@
#define PCIE_ANY_PORT 7
/* Service Type */
-#define PCIE_PORT_SERVICE_PME 1 /* Power Management Event */
-#define PCIE_PORT_SERVICE_AER 2 /* Advanced Error Reporting */
-#define PCIE_PORT_SERVICE_HP 4 /* Native Hotplug */
-#define PCIE_PORT_SERVICE_VC 8 /* Virtual Channel */
+#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */
+#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT)
+#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */
+#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT)
+#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */
+#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT)
+#define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */
+#define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT)
/* Root/Upstream/Downstream Port's Interrupt Mode */
+#define PCIE_PORT_NO_IRQ (-1)
#define PCIE_PORT_INTx_MODE 0
#define PCIE_PORT_MSI_MODE 1
#define PCIE_PORT_MSIX_MODE 2
-struct pcie_port_service_id {
- __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/
- __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
- __u32 class, class_mask; /* (class,subclass,prog-if) triplet */
- __u32 port_type, service_type; /* Port Entity */
- kernel_ulong_t driver_data;
+struct pcie_port_data {
+ int port_type; /* Type of the port */
+ int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */
};
struct pcie_device {
int irq; /* Service IRQ/MSI/MSI-X Vector */
- int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */
- struct pcie_port_service_id id; /* Service ID */
- struct pci_dev *port; /* Root/Upstream/Downstream Port */
+ struct pci_dev *port; /* Root/Upstream/Downstream Port */
+ u32 service; /* Port service this device represents */
void *priv_data; /* Service Private Data */
struct device device; /* Generic Device Interface */
};
@@ -56,10 +57,9 @@ static inline void* get_service_data(struct pcie_device *dev)
struct pcie_port_service_driver {
const char *name;
- int (*probe) (struct pcie_device *dev,
- const struct pcie_port_service_id *id);
+ int (*probe) (struct pcie_device *dev);
void (*remove) (struct pcie_device *dev);
- int (*suspend) (struct pcie_device *dev, pm_message_t state);
+ int (*suspend) (struct pcie_device *dev);
int (*resume) (struct pcie_device *dev);
/* Service Error Recovery Handler */
@@ -68,7 +68,9 @@ struct pcie_port_service_driver {
/* Link Reset Capability - AER service driver specific */
pci_ers_result_t (*reset_link) (struct pci_dev *dev);
- const struct pcie_port_service_id *id_table;
+ int port_type; /* Type of the port this driver can handle */
+ u32 service; /* Port service this device represents */
+
struct device_driver driver;
};
#define to_service_driver(d) \