diff options
author | Linus Torvalds | 2009-04-01 09:47:12 -0700 |
---|---|---|
committer | Linus Torvalds | 2009-04-01 09:47:12 -0700 |
commit | e76e5b2c663ac74ae6a542ac20795c625e36a5cd (patch) | |
tree | 2e7271be1f3a26832f4b121839fc4044fbbf27a6 | |
parent | 32527bc0e4b4fa7711ad1c923cf64ae72a7ffd9d (diff) | |
parent | eeafda70bf2807544e96fa4e52b2433cd470ff46 (diff) |
Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6: (88 commits)
PCI: fix HT MSI mapping fix
PCI: don't enable too much HT MSI mapping
x86/PCI: make pci=lastbus=255 work when acpi is on
PCI: save and restore PCIe 2.0 registers
PCI: update fakephp for bus_id removal
PCI: fix kernel oops on bridge removal
PCI: fix conflict between SR-IOV and config space sizing
powerpc/PCI: include pci.h in powerpc MSI implementation
PCI Hotplug: schedule fakephp for feature removal
PCI Hotplug: rename legacy_fakephp to fakephp
PCI Hotplug: restore fakephp interface with complete reimplementation
PCI: Introduce /sys/bus/pci/devices/.../rescan
PCI: Introduce /sys/bus/pci/devices/.../remove
PCI: Introduce /sys/bus/pci/rescan
PCI: Introduce pci_rescan_bus()
PCI: do not enable bridges more than once
PCI: do not initialize bridges more than once
PCI: always scan child buses
PCI: pci_scan_slot() returns newly found devices
PCI: don't scan existing devices
...
Fix trivial append-only conflict in Documentation/feature-removal-schedule.txt
62 files changed, 3641 insertions, 1902 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index e638e15a8895..97ad190e13af 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -41,6 +41,49 @@ Description: for the device and attempt to bind to it. For example: # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id +What: /sys/bus/pci/drivers/.../remove_id +Date: February 2009 +Contact: Chris Wright <chrisw@sous-sol.org> +Description: + Writing a device ID to this file will remove an ID + that was dynamically added via the new_id sysfs entry. + The format for the device ID is: + VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device + ID, Subsystem Vendor ID, Subsystem Device ID, Class, + and Class Mask. The Vendor ID and Device ID fields are + required, the rest are optional. After successfully + removing an ID, the driver will no longer support the + device. This is useful to ensure auto probing won't + match the driver to the device. For example: + # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id + +What: /sys/bus/pci/rescan +Date: January 2009 +Contact: Linux PCI developers <linux-pci@vger.kernel.org> +Description: + Writing a non-zero value to this attribute will + force a rescan of all PCI buses in the system, and + re-discover previously removed devices. + Depends on CONFIG_HOTPLUG. + +What: /sys/bus/pci/devices/.../remove +Date: January 2009 +Contact: Linux PCI developers <linux-pci@vger.kernel.org> +Description: + Writing a non-zero value to this attribute will + hot-remove the PCI device and any of its children. + Depends on CONFIG_HOTPLUG. + +What: /sys/bus/pci/devices/.../rescan +Date: January 2009 +Contact: Linux PCI developers <linux-pci@vger.kernel.org> +Description: + Writing a non-zero value to this attribute will + force a rescan of the device's parent bus and all + child buses, and re-discover devices removed earlier + from this part of the device tree. + Depends on CONFIG_HOTPLUG. + What: /sys/bus/pci/devices/.../vpd Date: February 2008 Contact: Ben Hutchings <bhutchings@solarflare.com> @@ -52,3 +95,30 @@ Description: that some devices may have malformatted data. If the underlying VPD has a writable section then the corresponding section of this file will be writable. + +What: /sys/bus/pci/devices/.../virtfnN +Date: March 2009 +Contact: Yu Zhao <yu.zhao@intel.com> +Description: + This symbolic link appears when hardware supports the SR-IOV + capability and the Physical Function driver has enabled it. + The symbolic link points to the PCI device sysfs entry of the + Virtual Function whose index is N (0...MaxVFs-1). + +What: /sys/bus/pci/devices/.../dep_link +Date: March 2009 +Contact: Yu Zhao <yu.zhao@intel.com> +Description: + This symbolic link appears when hardware supports the SR-IOV + capability and the Physical Function driver has enabled it, + and this device has vendor specific dependencies with others. + The symbolic link points to the PCI device sysfs entry of + Physical Function this device depends on. + +What: /sys/bus/pci/devices/.../physfn +Date: March 2009 +Contact: Yu Zhao <yu.zhao@intel.com> +Description: + This symbolic link appears when a device is a Virtual Function. + The symbolic link points to the PCI device sysfs entry of the + Physical Function this device associates with. diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index bc962cda6504..58c194572c76 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -199,6 +199,7 @@ X!Edrivers/pci/hotplug.c --> !Edrivers/pci/probe.c !Edrivers/pci/rom.c +!Edrivers/pci/iov.c </sect1> <sect1><title>PCI Hotplug Support Library</title> !Edrivers/pci/hotplug/pci_hotplug_core.c diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 256defd7e174..dcf7acc720e1 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -4,506 +4,356 @@ Revised Feb 12, 2004 by Martine Silbermann email: Martine.Silbermann@hp.com Revised Jun 25, 2004 by Tom L Nguyen + Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com> + Copyright 2003, 2008 Intel Corporation 1. About this guide -This guide describes the basics of Message Signaled Interrupts (MSI), -the advantages of using MSI over traditional interrupt mechanisms, -and how to enable your driver to use MSI or MSI-X. Also included is -a Frequently Asked Questions (FAQ) section. - -1.1 Terminology - -PCI devices can be single-function or multi-function. In either case, -when this text talks about enabling or disabling MSI on a "device -function," it is referring to one specific PCI device and function and -not to all functions on a PCI device (unless the PCI device has only -one function). - -2. Copyright 2003 Intel Corporation - -3. What is MSI/MSI-X? - -Message Signaled Interrupt (MSI), as described in the PCI Local Bus -Specification Revision 2.3 or later, is an optional feature, and a -required feature for PCI Express devices. MSI enables a device function -to request service by sending an Inbound Memory Write on its PCI bus to -the FSB as a Message Signal Interrupt transaction. Because MSI is -generated in the form of a Memory Write, all transaction conditions, -such as a Retry, Master-Abort, Target-Abort or normal completion, are -supported. - -A PCI device that supports MSI must also support pin IRQ assertion -interrupt mechanism to provide backward compatibility for systems that -do not support MSI. In systems which support MSI, the bus driver is -responsible for initializing the message address and message data of -the device function's MSI/MSI-X capability structure during device -initial configuration. - -An MSI capable device function indicates MSI support by implementing -the MSI/MSI-X capability structure in its PCI capability list. The -device function may implement both the MSI capability structure and -the MSI-X capability structure; however, the bus driver should not -enable both. - -The MSI capability structure contains Message Control register, -Message Address register and Message Data register. These registers -provide the bus driver control over MSI. The Message Control register -indicates the MSI capability supported by the device. The Message -Address register specifies the target address and the Message Data -register specifies the characteristics of the message. To request -service, the device function writes the content of the Message Data -register to the target address. The device and its software driver -are prohibited from writing to these registers. - -The MSI-X capability structure is an optional extension to MSI. It -uses an independent and separate capability structure. There are -some key advantages to implementing the MSI-X capability structure -over the MSI capability structure as described below. - - - Support a larger maximum number of vectors per function. - - - Provide the ability for system software to configure - each vector with an independent message address and message - data, specified by a table that resides in Memory Space. - - - MSI and MSI-X both support per-vector masking. Per-vector - masking is an optional extension of MSI but a required - feature for MSI-X. Per-vector masking provides the kernel the - ability to mask/unmask a single MSI while running its - interrupt service routine. If per-vector masking is - not supported, then the device driver should provide the - hardware/software synchronization to ensure that the device - generates MSI when the driver wants it to do so. - -4. Why use MSI? - -As a benefit to the simplification of board design, MSI allows board -designers to remove out-of-band interrupt routing. MSI is another -step towards a legacy-free environment. - -Due to increasing pressure on chipset and processor packages to -reduce pin count, the need for interrupt pins is expected to -diminish over time. Devices, due to pin constraints, may implement -messages to increase performance. - -PCI Express endpoints uses INTx emulation (in-band messages) instead -of IRQ pin assertion. Using INTx emulation requires interrupt -sharing among devices connected to the same node (PCI bridge) while -MSI is unique (non-shared) and does not require BIOS configuration -support. As a result, the PCI Express technology requires MSI -support for better interrupt performance. - -Using MSI enables the device functions to support two or more -vectors, which can be configured to target different CPUs to -increase scalability. - -5. Configuring a driver to use MSI/MSI-X - -By default, the kernel will not enable MSI/MSI-X on all devices that -support this capability. The CONFIG_PCI_MSI kernel option -must be selected to enable MSI/MSI-X support. - -5.1 Including MSI/MSI-X support into the kernel - -To allow MSI/MSI-X capable device drivers to selectively enable -MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described -below), the VECTOR based scheme needs to be enabled by setting -CONFIG_PCI_MSI during kernel config. - -Since the target of the inbound message is the local APIC, providing -CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI. - -5.2 Configuring for MSI support - -Due to the non-contiguous fashion in vector assignment of the -existing Linux kernel, this version does not support multiple -messages regardless of a device function is capable of supporting -more than one vector. To enable MSI on a device function's MSI -capability structure requires a device driver to call the function -pci_enable_msi() explicitly. - -5.2.1 API pci_enable_msi +This guide describes the basics of Message Signaled Interrupts (MSIs), +the advantages of using MSI over traditional interrupt mechanisms, how +to change your driver to use MSI or MSI-X and some basic diagnostics to +try if a device doesn't support MSIs. -int pci_enable_msi(struct pci_dev *dev) -With this new API, a device driver that wants to have MSI -enabled on its device function must call this API to enable MSI. -A successful call will initialize the MSI capability structure -with ONE vector, regardless of whether a device function is -capable of supporting multiple messages. This vector replaces the -pre-assigned dev->irq with a new MSI vector. To avoid a conflict -of the new assigned vector with existing pre-assigned vector requires -a device driver to call this API before calling request_irq(). +2. What are MSIs? -5.2.2 API pci_disable_msi +A Message Signaled Interrupt is a write from the device to a special +address which causes an interrupt to be received by the CPU. -void pci_disable_msi(struct pci_dev *dev) +The MSI capability was first specified in PCI 2.2 and was later enhanced +in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X +capability was also introduced with PCI 3.0. It supports more interrupts +per device than MSI and allows interrupts to be independently configured. -This API should always be used to undo the effect of pci_enable_msi() -when a device driver is unloading. This API restores dev->irq with -the pre-assigned IOAPIC vector and switches a device's interrupt -mode to PCI pin-irq assertion/INTx emulation mode. - -Note that a device driver should always call free_irq() on the MSI vector -that it has done request_irq() on before calling this API. Failure to do -so results in a BUG_ON() and a device will be left with MSI enabled and -leaks its vector. - -5.2.3 MSI mode vs. legacy mode diagram - -The below diagram shows the events which switch the interrupt -mode on the MSI-capable device function between MSI mode and -PIN-IRQ assertion mode. - - ------------ pci_enable_msi ------------------------ - | | <=============== | | - | MSI MODE | | PIN-IRQ ASSERTION MODE | - | | ===============> | | - ------------ pci_disable_msi ------------------------ - - -Figure 1. MSI Mode vs. Legacy Mode - -In Figure 1, a device operates by default in legacy mode. Legacy -in this context means PCI pin-irq assertion or PCI-Express INTx -emulation. A successful MSI request (using pci_enable_msi()) switches -a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector -stored in dev->irq will be saved by the PCI subsystem and a new -assigned MSI vector will replace dev->irq. - -To return back to its default mode, a device driver should always call -pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a -device driver should always call free_irq() on the MSI vector it has -done request_irq() on before calling pci_disable_msi(). Failure to do -so results in a BUG_ON() and a device will be left with MSI enabled and -leaks its vector. Otherwise, the PCI subsystem restores a device's -dev->irq with a pre-assigned IOAPIC vector and marks the released -MSI vector as unused. - -Once being marked as unused, there is no guarantee that the PCI -subsystem will reserve this MSI vector for a device. Depending on -the availability of current PCI vector resources and the number of -MSI/MSI-X requests from other drivers, this MSI may be re-assigned. - -For the case where the PCI subsystem re-assigns this MSI vector to -another driver, a request to switch back to MSI mode may result -in being assigned a different MSI vector or a failure if no more -vectors are available. - -5.3 Configuring for MSI-X support - -Due to the ability of the system software to configure each vector of -the MSI-X capability structure with an independent message address -and message data, the non-contiguous fashion in vector assignment of -the existing Linux kernel has no impact on supporting multiple -messages on an MSI-X capable device functions. To enable MSI-X on -a device function's MSI-X capability structure requires its device -driver to call the function pci_enable_msix() explicitly. - -The function pci_enable_msix(), once invoked, enables either -all or nothing, depending on the current availability of PCI vector -resources. If the PCI vector resources are available for the number -of vectors requested by a device driver, this function will configure -the MSI-X table of the MSI-X capability structure of a device with -requested messages. To emphasize this reason, for example, a device -may be capable for supporting the maximum of 32 vectors while its -software driver usually may request 4 vectors. It is recommended -that the device driver should call this function once during the -initialization phase of the device driver. - -Unlike the function pci_enable_msi(), the function pci_enable_msix() -does not replace the pre-assigned IOAPIC dev->irq with a new MSI -vector because the PCI subsystem writes the 1:1 vector-to-entry mapping -into the field vector of each element contained in a second argument. -Note that the pre-assigned IOAPIC dev->irq is valid only if the device -operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at -using dev->irq by the device driver to request for interrupt service -may result in unpredictable behavior. - -For each MSI-X vector granted, a device driver is responsible for calling -other functions like request_irq(), enable_irq(), etc. to enable -this vector with its corresponding interrupt service handler. It is -a device driver's choice to assign all vectors with the same -interrupt service handler or each vector with a unique interrupt -service handler. - -5.3.1 Handling MMIO address space of MSI-X Table - -The PCI 3.0 specification has implementation notes that MMIO address -space for a device's MSI-X structure should be isolated so that the -software system can set different pages for controlling accesses to the -MSI-X structure. The implementation of MSI support requires the PCI -subsystem, not a device driver, to maintain full control of the MSI-X -table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X -table/MSI-X PBA. A device driver should not access the MMIO address -space of the MSI-X table/MSI-X PBA. - -5.3.2 API pci_enable_msix +Devices may support both MSI and MSI-X, but only one can be enabled at +a time. -int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) -This API enables a device driver to request the PCI subsystem -to enable MSI-X messages on its hardware device. Depending on -the availability of PCI vectors resources, the PCI subsystem enables -either all or none of the requested vectors. +3. Why use MSIs? + +There are three reasons why using MSIs can give an advantage over +traditional pin-based interrupts. + +Pin-based PCI interrupts are often shared amongst several devices. +To support this, the kernel must call each interrupt handler associated +with an interrupt, which leads to reduced performance for the system as +a whole. MSIs are never shared, so this problem cannot arise. + +When a device writes data to memory, then raises a pin-based interrupt, +it is possible that the interrupt may arrive before all the data has +arrived in memory (this becomes more likely with devices behind PCI-PCI +bridges). In order to ensure that all the data has arrived in memory, +the interrupt handler must read a register on the device which raised +the interrupt. PCI transaction ordering rules require that all the data +arrives in memory before the value can be returned from the register. +Using MSIs avoids this problem as the interrupt-generating write cannot +pass the data writes, so by the time the interrupt is raised, the driver +knows that all the data has arrived in memory. + +PCI devices can only support a single pin-based interrupt per function. +Often drivers have to query the device to find out what event has +occurred, slowing down interrupt handling for the common case. With +MSIs, a device can support more interrupts, allowing each interrupt +to be specialised to a different purpose. One possible design gives +infrequent conditions (such as errors) their own interrupt which allows +the driver to handle the normal interrupt handling path more efficiently. +Other possible designs include giving one interrupt to each packet queue +in a network card or each port in a storage controller. + + +4. How to use MSIs + +PCI devices are initialised to use pin-based interrupts. The device +driver has to set up the device to use MSI or MSI-X. Not all machines +support MSIs correctly, and for those machines, the APIs described below +will simply fail and the device will continue to use pin-based interrupts. + +4.1 Include kernel support for MSIs + +To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI +option enabled. This option is only available on some architectures, +and it may depend on some other options also being set. For example, +on x86, you must also enable X86_UP_APIC or SMP in order to see the +CONFIG_PCI_MSI option. + +4.2 Using MSI + +Most of the hard work is done for the driver in the PCI layer. It simply +has to request that the PCI layer set up the MSI capability for this +device. + +4.2.1 pci_enable_msi + +int pci_enable_msi(struct pci_dev *dev) + +A successful call will allocate ONE interrupt to the device, regardless +of how many MSIs the device supports. The device will be switched from +pin-based interrupt mode to MSI mode. The dev->irq number is changed +to a new number which represents the message signaled interrupt. +This function should be called before the driver calls request_irq() +since enabling MSIs disables the pin-based IRQ and the driver will not +receive interrupts on the old interrupt. + +4.2.2 pci_enable_msi_block + +int pci_enable_msi_block(struct pci_dev *dev, int count) + +This variation on the above call allows a device driver to request multiple +MSIs. The MSI specification only allows interrupts to be allocated in +powers of two, up to a maximum of 2^5 (32). + +If this function returns 0, it has succeeded in allocating at least as many +interrupts as the driver requested (it may have allocated more in order +to satisfy the power-of-two requirement). In this case, the function +enables MSI on this device and updates dev->irq to be the lowest of +the new interrupts assigned to it. The other interrupts assigned to +the device are in the range dev->irq to dev->irq + count - 1. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. If this function returns a positive number, it will be +less than 'count' and indicate the number of interrupts that could have +been allocated. In neither case will the irq value have been +updated, nor will the device have been switched into MSI mode. + +The device driver must decide what action to take if +pci_enable_msi_block() returns a value less than the number asked for. +Some devices can make use of fewer interrupts than the maximum they +request; in this case the driver should call pci_enable_msi_block() +again. Note that it is not guaranteed to succeed, even when the +'count' has been reduced to the value returned from a previous call to +pci_enable_msi_block(). This is because there are multiple constraints +on the number of vectors that can be allocated; pci_enable_msi_block() +will return as soon as it finds any constraint that doesn't allow the +call to succeed. + +4.2.3 pci_disable_msi + +void pci_disable_msi(struct pci_dev *dev) -Argument 'dev' points to the device (pci_dev) structure. +This function should be used to undo the effect of pci_enable_msi() or +pci_enable_msi_block(). Calling it restores dev->irq to the pin-based +interrupt number and frees the previously allocated message signaled +interrupt(s). The interrupt may subsequently be assigned to another +device, so drivers should not cache the value of dev->irq. -Argument 'entries' is a pointer to an array of msix_entry structs. -The number of entries is indicated in argument 'nvec'. -struct msix_entry is defined in /driver/pci/msi.h: +A device driver must always call free_irq() on the interrupt(s) +for which it has called request_irq() before calling this function. +Failure to do so will result in a BUG_ON(), the device will be left with +MSI enabled and will leak its vector. + +4.3 Using MSI-X + +The MSI-X capability is much more flexible than the MSI capability. +It supports up to 2048 interrupts, each of which can be controlled +independently. To support this flexibility, drivers must use an array of +`struct msix_entry': struct msix_entry { u16 vector; /* kernel uses to write alloc vector */ u16 entry; /* driver uses to specify entry */ }; -A device driver is responsible for initializing the field 'entry' of -each element with a unique entry supported by MSI-X table. Otherwise, --EINVAL will be returned as a result. A successful return of zero -indicates the PCI subsystem completed initializing each of the requested -entries of the MSI-X table with message address and message data. -Last but not least, the PCI subsystem will write the 1:1 -vector-to-entry mapping into the field 'vector' of each element. A -device driver is responsible for keeping track of allocated MSI-X -vectors in its internal data structure. - -A return of zero indicates that the number of MSI-X vectors was -successfully allocated. A return of greater than zero indicates -MSI-X vector shortage. Or a return of less than zero indicates -a failure. This failure may be a result of duplicate entries -specified in second argument, or a result of no available vector, -or a result of failing to initialize MSI-X table entries. - -5.3.3 API pci_disable_msix +This allows for the device to use these interrupts in a sparse fashion; +for example it could use interrupts 3 and 1027 and allocate only a +two-element array. The driver is expected to fill in the 'entry' value +in each element of the array to indicate which entries it wants the kernel +to assign interrupts for. It is invalid to fill in two entries with the +same number. + +4.3.1 pci_enable_msix + +int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) + +Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. +The 'entries' argument is a pointer to an array of msix_entry structs +which should be at least 'nvec' entries in size. On success, the +function will return 0 and the device will have been switched into +MSI-X interrupt mode. The 'vector' elements in each entry will have +been filled in with the interrupt number. The driver should then call +request_irq() for each 'vector' that it decides to use. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to allocate any more MSI-X interrupts for +this device. If it returns a positive number, it indicates the maximum +number of interrupt vectors that could have been allocated. See example +below. + +This function, in contrast with pci_enable_msi(), does not adjust +dev->irq. The device will not generate interrupts for this interrupt +number once MSI-X is enabled. The device driver is responsible for +keeping track of the interrupts assigned to the MSI-X vectors so it can +free them again later. + +Device drivers should normally call this function once per device +during the initialization phase. + +It is ideal if drivers can cope with a variable number of MSI-X interrupts, +there are many reasons why the platform may not be able to provide the +exact number a driver asks for. + +A request loop to achieve that might look like: + +static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) +{ + while (nvec >= FOO_DRIVER_MINIMUM_NVEC) { + rc = pci_enable_msix(adapter->pdev, + adapter->msix_entries, nvec); + if (rc > 0) + nvec = rc; + else + return rc; + } + + return -ENOSPC; +} + +4.3.2 pci_disable_msix void pci_disable_msix(struct pci_dev *dev) -This API should always be used to undo the effect of pci_enable_msix() -when a device driver is unloading. Note that a device driver should -always call free_irq() on all MSI-X vectors it has done request_irq() -on before calling this API. Failure to do so results in a BUG_ON() and -a device will be left with MSI-X enabled and leaks its vectors. - -5.3.4 MSI-X mode vs. legacy mode diagram - -The below diagram shows the events which switch the interrupt -mode on the MSI-X capable device function between MSI-X mode and -PIN-IRQ assertion mode (legacy). - - ------------ pci_enable_msix(,,n) ------------------------ - | | <=============== | | - | MSI-X MODE | | PIN-IRQ ASSERTION MODE | - | | ===============> | | - ------------ pci_disable_msix ------------------------ - -Figure 2. MSI-X Mode vs. Legacy Mode - -In Figure 2, a device operates by default in legacy mode. A -successful MSI-X request (using pci_enable_msix()) switches a -device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector -stored in dev->irq will be saved by the PCI subsystem; however, -unlike MSI mode, the PCI subsystem will not replace dev->irq with -assigned MSI-X vector because the PCI subsystem already writes the 1:1 -vector-to-entry mapping into the field 'vector' of each element -specified in second argument. - -To return back to its default mode, a device driver should always call -pci_disable_msix() to undo the effect of pci_enable_msix(). Note that -a device driver should always call free_irq() on all MSI-X vectors it -has done request_irq() on before calling pci_disable_msix(). Failure -to do so results in a BUG_ON() and a device will be left with MSI-X -enabled and leaks its vectors. Otherwise, the PCI subsystem switches a -device function's interrupt mode from MSI-X mode to legacy mode and -marks all allocated MSI-X vectors as unused. - -Once being marked as unused, there is no guarantee that the PCI -subsystem will reserve these MSI-X vectors for a device. Depending on -the availability of current PCI vector resources and the number of -MSI/MSI-X requests from other drivers, these MSI-X vectors may be -re-assigned. - -For the case where the PCI subsystem re-assigned these MSI-X vectors -to other drivers, a request to switch back to MSI-X mode may result -being assigned with another set of MSI-X vectors or a failure if no -more vectors are available. - -5.4 Handling function implementing both MSI and MSI-X capabilities - -For the case where a function implements both MSI and MSI-X -capabilities, the PCI subsystem enables a device to run either in MSI -mode or MSI-X mode but not both. A device driver determines whether it -wants MSI or MSI-X enabled on its hardware device. Once a device -driver requests for MSI, for example, it is prohibited from requesting -MSI-X; in other words, a device driver is not permitted to ping-pong -between MSI mod MSI-X mode during a run-time. - -5.5 Hardware requirements for MSI/MSI-X support - -MSI/MSI-X support requires support from both system hardware and -individual hardware device functions. - -5.5.1 Required x86 hardware support - -Since the target of MSI address is the local APIC CPU, enabling -MSI/MSI-X support in the Linux kernel is dependent on whether existing -system hardware supports local APIC. Users should verify that their -system supports local APIC operation by testing that it runs when -CONFIG_X86_LOCAL_APIC=y. - -In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set; -however, in UP environment, users must manually set -CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting -CONFIG_PCI_MSI enables the VECTOR based scheme and the option for -MSI-capable device drivers to selectively enable MSI/MSI-X. - -Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X -vector is allocated new during runtime and MSI/MSI-X support does not -depend on BIOS support. This key independency enables MSI/MSI-X -support on future IOxAPIC free platforms. - -5.5.2 Device hardware support - -The hardware device function supports MSI by indicating the -MSI/MSI-X capability structure on its PCI capability list. By -default, this capability structure will not be initialized by -the kernel to enable MSI during the system boot. In other words, -the device function is running on its default pin assertion mode. -Note that in many cases the hardware supporting MSI have bugs, -which may result in system hangs. The software driver of specific -MSI-capable hardware is responsible for deciding whether to call -pci_enable_msi or not. A return of zero indicates the kernel -successfully initialized the MSI/MSI-X capability structure of the -device function. The device function is now running on MSI/MSI-X mode. - -5.6 How to tell whether MSI/MSI-X is enabled on device function - -At the driver level, a return of zero from the function call of -pci_enable_msi()/pci_enable_msix() indicates to a device driver that -its device function is initialized successfully and ready to run in -MSI/MSI-X mode. - -At the user level, users can use the command 'cat /proc/interrupts' -to display the vectors allocated for devices and their interrupt -MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is -enabled on a SCSI Adaptec 39320D Ultra320 controller. - - CPU0 CPU1 - 0: 324639 0 IO-APIC-edge timer - 1: 1186 0 IO-APIC-edge i8042 - 2: 0 0 XT-PIC cascade - 12: 2797 0 IO-APIC-edge i8042 - 14: 6543 0 IO-APIC-edge ide0 - 15: 1 0 IO-APIC-edge ide1 -169: 0 0 IO-APIC-level uhci-hcd -185: 0 0 IO-APIC-level uhci-hcd -193: 138 10 PCI-MSI aic79xx -201: 30 0 PCI-MSI aic79xx -225: 30 0 IO-APIC-level aic7xxx -233: 30 0 IO-APIC-level aic7xxx -NMI: 0 0 -LOC: 324553 325068 -ERR: 0 -MIS: 0 - -6. MSI quirks - -Several PCI chipsets or devices are known to not support MSI. -The PCI stack provides 3 possible levels of MSI disabling: -* on a single device -* on all devices behind a specific bridge -* globally - -6.1. Disabling MSI on a single device - -Under some circumstances it might be required to disable MSI on a -single device. This may be achieved by either not calling pci_enable_msi() -or all, or setting the pci_dev->no_msi flag before (most of the time -in a quirk). - -6.2. Disabling MSI below a bridge - -The vast majority of MSI quirks are required by PCI bridges not -being able to route MSI between busses. In this case, MSI have to be -disabled on all devices behind this bridge. It is achieves by setting -the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge -subordinate bus. There is no need to set the same flag on bridges that -are below the broken bridge. When pci_enable_msi() is called to enable -MSI on a device, pci_msi_supported() takes care of checking the NO_MSI -flag in all parent busses of the device. - -Some bridges actually support dynamic MSI support enabling/disabling -by changing some bits in their PCI configuration space (especially -the Hypertransport chipsets such as the nVidia nForce and Serverworks -HT2000). It may then be required to update the NO_MSI flag on the -corresponding devices in the sysfs hierarchy. To enable MSI support -on device "0000:00:0e", do: - - echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus - -To disable MSI support, echo 0 instead of 1. Note that it should be -used with caution since changing this value might break interrupts. - -6.3. Disabling MSI globally - -Some extreme cases may require to disable MSI globally on the system. -For now, the only known case is a Serverworks PCI-X chipsets (MSI are -not supported on several busses that are not all connected to the -chipset in the Linux PCI hierarchy). In the vast majority of other -cases, disabling only behind a specific bridge is enough. - -For debugging purpose, the user may also pass pci=nomsi on the kernel -command-line to explicitly disable MSI globally. But, once the appro- -priate quirks are added to the kernel, this option should not be -required anymore. - -6.4. Finding why MSI cannot be enabled on a device - -Assuming that MSI are not enabled on a device, you should look at -dmesg to find messages that quirks may output when disabling MSI -on some devices, some bridges or even globally. -Then, lspci -t gives the list of bridges above a device. Reading -/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI -are enabled (1) or disabled (0). In 0 is found in a single bridge -msi_bus file above the device, MSI cannot be enabled. - -7. FAQ - -Q1. Are there any limitations on using the MSI? - -A1. If the PCI device supports MSI and conforms to the -specification and the platform supports the APIC local bus, -then using MSI should work. - -Q2. Will it work on all the Pentium processors (P3, P4, Xeon, -AMD processors)? In P3 IPI's are transmitted on the APIC local -bus and in P4 and Xeon they are transmitted on the system -bus. Are there any implications with this? - -A2. MSI support enables a PCI device sending an inbound -memory write (0xfeexxxxx as target address) on its PCI bus -directly to the FSB. Since the message address has a -redirection hint bit cleared, it should work. - -Q3. The target address 0xfeexxxxx will be translated by the -Host Bridge into an interrupt message. Are there any -limitations on the chipsets such as Intel 8xx, Intel e7xxx, -or VIA? - -A3. If these chipsets support an inbound memory write with -target address set as 0xfeexxxxx, as conformed to PCI -specification 2.3 or latest, then it should work. - -Q4. From the driver point of view, if the MSI is lost because -of errors occurring during inbound memory write, then it may -wait forever. Is there a mechanism for it to recover? - -A4. Since the target of the transaction is an inbound memory -write, all transaction termination conditions (Retry, -Master-Abort, Target-Abort, or normal completion) are -supported. A device sending an MSI must abide by all the PCI -rules and conditions regarding that inbound memory write. So, -if a retry is signaled it must retry, etc... We believe that -the recommendation for Abort is also a retry (refer to PCI -specification 2.3 or latest). +This API should be used to undo the effect of pci_enable_msix(). It frees +the previously allocated message signaled interrupts. The interrupts may +subsequently be assigned to another device, so drivers should not cache +the value of the 'vector' elements over a call to pci_disable_msix(). + +A device driver must always call free_irq() on the interrupt(s) +for which it has called request_irq() before calling this function. +Failure to do so will result in a BUG_ON(), the device will be left with +MSI enabled and will leak its vector. + +4.3.3 The MSI-X Table + +The MSI-X capability specifies a BAR and offset within that BAR for the +MSI-X Table. This address is mapped by the PCI subsystem, and should not +be accessed directly by the device driver. If the driver wishes to +mask or unmask an interrupt, it should call disable_irq() / enable_irq(). + +4.4 Handling devices implementing both MSI and MSI-X capabilities + +If a device implements both MSI and MSI-X capabilities, it can +run in either MSI mode or MSI-X mode but not both simultaneously. +This is a requirement of the PCI spec, and it is enforced by the +PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or +pci_enable_msix() when MSI is already enabled will result in an error. +If a device driver wishes to switch between MSI and MSI-X at runtime, +it must first quiesce the device, then switch it back to pin-interrupt +mode, before calling pci_enable_msi() or pci_enable_msix() and resuming +operation. This is not expected to be a common operation but may be +useful for debugging or testing during development. + +4.5 Considerations when using MSIs + +4.5.1 Choosing between MSI-X and MSI + +If your device supports both MSI-X and MSI capabilities, you should use +the MSI-X facilities in preference to the MSI facilities. As mentioned +above, MSI-X supports any number of interrupts between 1 and 2048. +In constrast, MSI is restricted to a maximum of 32 interrupts (and +must be a power of two). In addition, the MSI interrupt vectors must +be allocated consecutively, so the system may not be able to allocate +as many vectors for MSI as it could for MSI-X. On some platforms, MSI +interrupts must all be targetted at the same set of CPUs whereas MSI-X +interrupts can all be targetted at different CPUs. + +4.5.2 Spinlocks + +Most device drivers have a per-device spinlock which is taken in the +interrupt handler. With pin-based interrupts or a single MSI, it is not +necessary to disable interrupts (Linux guarantees the same interrupt will +not be re-entered). If a device uses multiple interrupts, the driver +must disable interrupts while the lock is held. If the device sends +a different interrupt, the driver will deadlock trying to recursively +acquire the spinlock. + +There are two solutions. The first is to take the lock with +spin_lock_irqsave() or spin_lock_irq() (see +Documentation/DocBook/kernel-locking). The second is to specify +IRQF_DISABLED to request_irq() so that the kernel runs the entire +interrupt routine with interrupts disabled. + +If your MSI interrupt routine does not hold the lock for the whole time +it is running, the first solution may be best. The second solution is +normally preferred as it avoids making two transitions from interrupt +disabled to enabled and back again. + +4.6 How to tell whether MSI/MSI-X is enabled on a device + +Using 'lspci -v' (as root) may show some devices with "MSI", "Message +Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities +has an 'Enable' flag which will be followed with either "+" (enabled) +or "-" (disabled). + + +5. MSI quirks + +Several PCI chipsets or devices are known not to support MSIs. +The PCI stack provides three ways to disable MSIs: + +1. globally +2. on all devices behind a specific bridge +3. on a single device + +5.1. Disabling MSIs globally + +Some host chipsets simply don't support MSIs properly. If we're +lucky, the manufacturer knows this and has indicated it in the ACPI +FADT table. In this case, Linux will automatically disable MSIs. +Some boards don't include this information in the table and so we have +to detect them ourselves. The complete list of these is found near the +quirk_disable_all_msi() function in drivers/pci/quirks.c. + +If you have a board which has problems with MSIs, you can pass pci=nomsi +on the kernel command line to disable MSIs on all devices. It would be +in your best interests to report the problem to linux-pci@vger.kernel.org +including a full 'lspci -v' so we can add the quirks to the kernel. + +5.2. Disabling MSIs below a bridge + +Some PCI bridges are not able to route MSIs between busses properly. +In this case, MSIs must be disabled on all devices behind the bridge. + +Some bridges allow you to enable MSIs by changing some bits in their +PCI configuration space (especially the Hypertransport chipsets such +as the nVidia nForce and Serverworks HT2000). As with host chipsets, +Linux mostly knows about them and automatically enables MSIs if it can. +If you have a bridge which Linux doesn't yet know about, you can enable +MSIs in configuration space using whatever method you know works, then +enable MSIs on that bridge by doing: + + echo 1 > /sys/bus/pci/devices/$bridge/msi_bus + +where $bridge is the PCI address of the bridge you've enabled (eg +0000:00:0e.0). + +To disable MSIs, echo 0 instead of 1. Changing this value should be +done with caution as it can break interrupt handling for all devices +below this bridge. + +Again, please notify linux-pci@vger.kernel.org of any bridges that need +special handling. + +5.3. Disabling MSIs on a single device + +Some devices are known to have faulty MSI implementations. Usually this +is handled in the individual device driver but occasionally it's necessary +to handle this with a quirk. Some drivers have an option to disable use +of MSI. While this is a convenient workaround for the driver author, +it is not good practise, and should not be emulated. + +5.4. Finding why MSIs are disabled on a device + +From the above three sections, you can see that there are many reasons +why MSIs may not be enabled for a given device. Your first step should +be to examine your dmesg carefully to determine whether MSIs are enabled +for your machine. You should also check your .config to be sure you +have enabled CONFIG_PCI_MSI. + +Then, 'lspci -t' gives the list of bridges above a device. Reading +/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1) +or disabled (0). If 0 is found in any of the msi_bus files belonging +to bridges between the PCI root and the device, MSIs are disabled. + +It is also worth checking the device driver to see whether it supports MSIs. +For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or +pci_enable_msi_block(). diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt new file mode 100644 index 000000000000..fc73ef5d65b8 --- /dev/null +++ b/Documentation/PCI/pci-iov-howto.txt @@ -0,0 +1,99 @@ + PCI Express I/O Virtualization Howto + Copyright (C) 2009 Intel Corporation + Yu Zhao <yu.zhao@intel.com> + + +1. Overview + +1.1 What is SR-IOV + +Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended +capability which makes one physical device appear as multiple virtual +devices. The physical device is referred to as Physical Function (PF) +while the virtual devices are referred to as Virtual Functions (VF). +Allocation of the VF can be dynamically controlled by the PF via +registers encapsulated in the capability. By default, this feature is +not enabled and the PF behaves as traditional PCIe device. Once it's +turned on, each VF's PCI configuration space can be accessed by its own +Bus, Device and Function Number (Routing ID). And each VF also has PCI +Memory Space, which is used to map its register set. VF device driver +operates on the register set so it can be functional and appear as a +real existing PCI device. + +2. User Guide + +2.1 How can I enable SR-IOV capability + +The device driver (PF driver) will control the enabling and disabling +of the capability via API provided by SR-IOV core. If the hardware +has SR-IOV capability, loading its PF driver would enable it and all +VFs associated with the PF. + +2.2 How can I use the Virtual Functions + +The VF is treated as hot-plugged PCI devices in the kernel, so they +should be able to work in the same way as real PCI devices. The VF +requires device driver that is same as a normal PCI device's. + +3. Developer Guide + +3.1 SR-IOV API + +To enable SR-IOV capability: + int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); + 'nr_virtfn' is number of VFs to be enabled. + +To disable SR-IOV capability: + void pci_disable_sriov(struct pci_dev *dev); + +To notify SR-IOV core of Virtual Function Migration: + irqreturn_t pci_sriov_migration(struct pci_dev *dev); + +3.2 Usage example + +Following piece of code illustrates the usage of the SR-IOV API. + +static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + pci_enable_sriov(dev, NR_VIRTFN); + + ... + + return 0; +} + +static void __devexit dev_remove(struct pci_dev *dev) +{ + pci_disable_sriov(dev); + + ... +} + +static int dev_suspend(struct pci_dev *dev, pm_message_t state) +{ + ... + + return 0; +} + +static int dev_resume(struct pci_dev *dev) +{ + ... + + return 0; +} + +static void dev_shutdown(struct pci_dev *dev) +{ + ... +} + +static struct pci_driver dev_driver = { + .name = "SR-IOV Physical Function driver", + .id_table = dev_id_table, + .probe = dev_probe, + .remove = __devexit_p(dev_remove), + .suspend = dev_suspend, + .resume = dev_resume, + .shutdown = dev_shutdown, +}; diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ea7d1bdad34d..d0f354670646 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -392,3 +392,35 @@ Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) have been kept around for migration reasons. After more than two years it's time to remove them finally Who: Thomas Gleixner <tglx@linutronix.de> + +--------------------------- + +What: fakephp and associated sysfs files in /sys/bus/pci/slots/ +When: 2011 +Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to + represent a machine's physical PCI slots. The change in semantics + had userspace implications, as the hotplug core no longer allowed + drivers to create multiple sysfs files per physical slot (required + for multi-function devices, e.g.). fakephp was seen as a developer's + tool only, and its interface changed. Too late, we learned that + there were some users of the fakephp interface. + + In 2.6.30, the original fakephp interface was restored. At the same + time, the PCI core gained the ability that fakephp provided, namely + function-level hot-remove and hot-add. + + Since the PCI core now provides the same functionality, exposed in: + + /sys/bus/pci/rescan + /sys/bus/pci/devices/.../remove + /sys/bus/pci/devices/.../rescan + + there is no functional reason to maintain fakephp as well. + + We will keep the existing module so that 'modprobe fakephp' will + present the old /sys/bus/pci/slots/... interface for compatibility, + but users are urged to migrate their applications to the API above. + + After a reasonable transition period, we will remove the legacy + fakephp interface. +Who: Alex Chiang <achiang@hp.com> diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 9f8740ca3f3b..26e4b8bc53ee 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -12,6 +12,7 @@ that support it. For example, a given bus might look like this: | |-- enable | |-- irq | |-- local_cpus + | |-- remove | |-- resource | |-- resource0 | |-- resource1 @@ -36,6 +37,7 @@ files, each with their own function. enable Whether the device is enabled (ascii, rw) irq IRQ number (ascii, ro) local_cpus nearby CPU mask (cpumask, ro) + remove remove device from kernel's list (ascii, wo) resource PCI resource host addresses (ascii, ro) resource0..N PCI resource N, if present (binary, mmap) resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) @@ -46,6 +48,7 @@ files, each with their own function. ro - read only file rw - file is readable and writable + wo - write only file mmap - file is mmapable ascii - file contains ascii text binary - file contains binary data @@ -73,6 +76,13 @@ that the device must be enabled for a rom read to return data succesfully. In the event a driver is not bound to the device, it can be enabled using the 'enable' file, documented above. +The 'remove' file is used to remove the PCI device, by writing a non-zero +integer to the file. This does not involve any kind of hot-plug functionality, +e.g. powering off the device. The device is removed from the kernel's list of +PCI devices, the sysfs directory for it is removed, and the device will be +removed from any drivers attached to it. Removal of PCI root buses is +disallowed. + Accessing legacy resources through sysfs ---------------------------------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aeedb89a307a..240257dd4238 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1695,6 +1695,8 @@ and is between 256 and 4096 characters. It is defined in the file See also Documentation/blockdev/paride.txt. pci=option[,option...] [PCI] various PCI subsystem options: + earlydump [X86] dump PCI config space before the kernel + changes anything off [X86] don't probe for the PCI bus bios [X86-32] force use of PCI BIOS, don't access the hardware directly. Use this if your machine @@ -1794,6 +1796,15 @@ and is between 256 and 4096 characters. It is defined in the file cbmemsize=nn[KMG] The fixed amount of bus space which is reserved for the CardBus bridge's memory window. The default value is 64 megabytes. + resource_alignment= + Format: + [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...] + Specifies alignment and device to reassign + aligned memory resources. + If <order of align> is not specified, + PAGE_SIZE is used as alignment. + PCI-PCI bridge can be specified, if resource + windows need to be expanded. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index 2a14302c17a3..cb04eaa6ba33 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -273,4 +273,18 @@ struct pci_dev *alpha_gendev_to_pci(struct device *dev); extern struct pci_dev *isa_bridge; +extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, + size_t count); +extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, + size_t count); +extern int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); +extern void pci_adjust_legacy_attr(struct pci_bus *bus, + enum pci_mmap_state mmap_type); +#define HAVE_PCI_LEGACY 1 + +extern int pci_create_resource_files(struct pci_dev *dev); +extern void pci_remove_resource_files(struct pci_dev *dev); + #endif /* __ALPHA_PCI_H */ diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index b4697759a123..a427538252f8 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -12,7 +12,7 @@ obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PCI) += pci.o pci_iommu.o +obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o obj-$(CONFIG_SRM_ENV) += srm_env.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c new file mode 100644 index 000000000000..6ea822e7f724 --- /dev/null +++ b/arch/alpha/kernel/pci-sysfs.c @@ -0,0 +1,366 @@ +/* + * arch/alpha/kernel/pci-sysfs.c + * + * Copyright (C) 2009 Ivan Kokshaysky + * + * Alpha PCI resource files. + * + * Loosely based on generic HAVE_PCI_MMAP implementation in + * drivers/pci/pci-sysfs.c + */ + +#include <linux/sched.h> +#include <linux/pci.h> + +static int hose_mmap_page_range(struct pci_controller *hose, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_type, int sparse) +{ + unsigned long base; + + if (mmap_type == pci_mmap_mem) + base = sparse ? hose->sparse_mem_base : hose->dense_mem_base; + else + base = sparse ? hose->sparse_io_base : hose->dense_io_base; + + vma->vm_pgoff += base >> PAGE_SHIFT; + vma->vm_flags |= (VM_IO | VM_RESERVED); + + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static int __pci_mmap_fits(struct pci_dev *pdev, int num, + struct vm_area_struct *vma, int sparse) +{ + unsigned long nr, start, size; + int shift = sparse ? 5 : 0; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1; + + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d " + "(size 0x%08lx)\n", + current->comm, sparse ? " sparse" : "", start, start + nr, + pci_name(pdev), num, size); + return 0; +} + +/** + * pci_mmap_resource - map a PCI resource into user memory space + * @kobj: kobject for mapping + * @attr: struct bin_attribute for the file being mapped + * @vma: struct vm_area_struct passed into the mmap + * @sparse: address space type + * + * Use the bus mapping routines to map a PCI resource into userspace. + */ +static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma, int sparse) +{ + struct pci_dev *pdev = to_pci_dev(container_of(kobj, + struct device, kobj)); + struct resource *res = (struct resource *)attr->private; + enum pci_mmap_state mmap_type; + struct pci_bus_region bar; + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) + if (res == &pdev->resource[i]) + break; + if (i >= PCI_ROM_RESOURCE) + return -ENODEV; + + if (!__pci_mmap_fits(pdev, i, vma, sparse)) + return -EINVAL; + + if (iomem_is_exclusive(res->start)) + return -EINVAL; + + pcibios_resource_to_bus(pdev, &bar, res); + vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0)); + mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; + + return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse); +} + +static int pci_mmap_resource_sparse(struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 1); +} + +static int pci_mmap_resource_dense(struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 0); +} + +/** + * pci_remove_resource_files - cleanup resource files + * @dev: dev to cleanup + * + * If we created resource files for @dev, remove them from sysfs and + * free their resources. + */ +void pci_remove_resource_files(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + struct bin_attribute *res_attr; + + res_attr = pdev->res_attr[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + + res_attr = pdev->res_attr_wc[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + } +} + +static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num) +{ + struct pci_bus_region bar; + struct pci_controller *hose = pdev->sysdata; + long dense_offset; + unsigned long sparse_size; + + pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]); + + /* All core logic chips have 4G sparse address space, except + CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM + definitions in asm/core_xxx.h files). This corresponds + to 128M or 512M of the bus space. */ + dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base); + sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000; + + return bar.end < sparse_size; +} + +static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name, + char *suffix, struct bin_attribute *res_attr, + unsigned long sparse) +{ + size_t size = pci_resource_len(pdev, num); + + sprintf(name, "resource%d%s", num, suffix); + res_attr->mmap = sparse ? pci_mmap_resource_sparse : + pci_mmap_resource_dense; + res_attr->attr.name = name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; + res_attr->size = sparse ? size << 5 : size; + res_attr->private = &pdev->resource[num]; + return sysfs_create_bin_file(&pdev->dev.kobj, res_attr); +} + +static int pci_create_attr(struct pci_dev *pdev, int num) +{ + /* allocate attribute structure, piggyback attribute name */ + int retval, nlen1, nlen2 = 0, res_count = 1; + unsigned long sparse_base, dense_base; + struct bin_attribute *attr; + struct pci_controller *hose = pdev->sysdata; + char *suffix, *attr_name; + + suffix = ""; /* Assume bwx machine, normal resourceN files. */ + nlen1 = 10; + + if (pdev->resource[num].flags & IORESOURCE_MEM) { + sparse_base = hose->sparse_mem_base; + dense_base = hose->dense_mem_base; + if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) { + sparse_base = 0; + suffix = "_dense"; + nlen1 = 16; /* resourceN_dense */ + } + } else { + sparse_base = hose->sparse_io_base; + dense_base = hose->dense_io_base; + } + + if (sparse_base) { + suffix = "_sparse"; + nlen1 = 17; + if (dense_base) { + nlen2 = 16; /* resourceN_dense */ + res_count = 2; + } + } + + attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC); + if (!attr) + return -ENOMEM; + + /* Create bwx, sparse or single dense file */ + attr_name = (char *)(attr + res_count); + pdev->res_attr[num] = attr; + retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr, + sparse_base); + if (retval || res_count == 1) + return retval; + + /* Create dense file */ + attr_name += nlen1; + attr++; + pdev->res_attr_wc[num] = attr; + return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0); +} + +/** + * pci_create_resource_files - create resource files in sysfs for @dev + * @dev: dev in question + * + * Walk the resources in @dev creating files for each resource available. + */ +int pci_create_resource_files(struct pci_dev *pdev) +{ + int i; + int retval; + + /* Expose the PCI resources from this device as files */ + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + + /* skip empty resources */ + if (!pci_resource_len(pdev, i)) + continue; + + retval = pci_create_attr(pdev, i); + if (retval) { + pci_remove_resource_files(pdev); + return retval; + } + } + return 0; +} + +/* Legacy I/O bus mapping stuff. */ + +static int __legacy_mmap_fits(struct pci_controller *hose, + struct vm_area_struct *vma, + unsigned long res_size, int sparse) +{ + unsigned long nr, start, size; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = ((res_size - 1) >> PAGE_SHIFT) + 1; + + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d " + "(size 0x%08lx)\n", + current->comm, sparse ? " sparse" : "", start, start + nr, + hose->index, size); + return 0; +} + +static inline int has_sparse(struct pci_controller *hose, + enum pci_mmap_state mmap_type) +{ + unsigned long base; + + base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base : + hose->sparse_io_base; + + return base != 0; +} + +int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, + enum pci_mmap_state mmap_type) +{ + struct pci_controller *hose = bus->sysdata; + int sparse = has_sparse(hose, mmap_type); + unsigned long res_size; + + res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size : + bus->legacy_io->size; + if (!__legacy_mmap_fits(hose, vma, res_size, sparse)) + return -EINVAL; + + return hose_mmap_page_range(hose, vma, mmap_type, sparse); +} + +/** + * pci_adjust_legacy_attr - adjustment of legacy file attributes + * @b: bus to create files under + * @mmap_type: I/O port or memory + * + * Adjust file name and size for sparse mappings. + */ +void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type) +{ + struct pci_controller *hose = bus->sysdata; + + if (!has_sparse(hose, mmap_type)) + return; + + if (mmap_type == pci_mmap_mem) { + bus->legacy_mem->attr.name = "legacy_mem_sparse"; + bus->legacy_mem->size <<= 5; + } else { + bus->legacy_io->attr.name = "legacy_io_sparse"; + bus->legacy_io->size <<= 5; + } + return; +} + +/* Legacy I/O bus read/write functions */ +int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) +{ + struct pci_controller *hose = bus->sysdata; + + port += hose->io_space->start; + + switch(size) { + case 1: + *((u8 *)val) = inb(port); + return 1; + case 2: + if (port & 1) + return -EINVAL; + *((u16 *)val) = inw(port); + return 2; + case 4: + if (port & 3) + return -EINVAL; + *((u32 *)val) = inl(port); + return 4; + } + return -EINVAL; +} + +int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) +{ + struct pci_controller *hose = bus->sysdata; + + port += hose->io_space->start; + + switch(size) { + case 1: + outb(port, val); + return 1; + case 2: + if (port & 1) + return -EINVAL; + outw(port, val); + return 2; + case 4: + if (port & 3) + return -EINVAL; + outl(port, val); + return 4; + } + return -EINVAL; +} diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 3548159a1beb..ba17d5d90a49 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -114,6 +114,10 @@ extern int pci_domain_nr(struct pci_bus *bus); /* Decide whether to display the domain number in /proc */ extern int pci_proc_domain(struct pci_bus *bus); +/* MSI arch hooks */ +#define arch_setup_msi_irqs arch_setup_msi_irqs +#define arch_teardown_msi_irqs arch_teardown_msi_irqs +#define arch_msi_check_device arch_msi_check_device struct vm_area_struct; /* Map a range of PCI memory or I/O space for a device into user space */ diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 3bb7d3dd28be..8bbc12d20f5c 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/msi.h> +#include <linux/pci.h> #include <asm/machdep.h> @@ -19,6 +20,10 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type) return -ENOSYS; } + /* PowerPC doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + if (ppc_md.msi_check_device) { pr_debug("msi: Using platform check routine.\n"); return ppc_md.msi_check_device(dev, nvec, type); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index a977de23cb4d..a0301bfeb954 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -86,6 +86,9 @@ static inline void early_quirks(void) { } extern void pci_iommu_alloc(void); +/* MSI arch hook */ +#define arch_setup_msi_irqs arch_setup_msi_irqs + #endif /* __KERNEL__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index da99ffcdfde6..1bb5c6cee3eb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3468,6 +3468,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct intel_iommu *iommu = NULL; int index = 0; + /* x86 doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c7c4776ff630..90f5b9ef5def 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -300,8 +300,7 @@ fs_initcall(pci_iommu_init); static __devinit void via_no_dac(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO - "PCI: VIA PCI bridge detected. Disabling DAC.\n"); + dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); forbid_dac = 1; } } diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index f6adf2c6d751..aaf26ae58cd5 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -69,11 +69,12 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func) int j; u32 val; - printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func); + printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:", + bus, slot, func); for (i = 0; i < 256; i += 4) { if (!(i & 0x0f)) - printk("\n%04x:",i); + printk("\n %02x:",i); val = read_pci_config(bus, slot, func, i); for (j = 0; j < 4; j++) { @@ -96,20 +97,22 @@ void early_dump_pci_devices(void) for (func = 0; func < 8; func++) { u32 class; u8 type; + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; + continue; early_dump_pci_device(bus, slot, func); - /* No multi-function device? */ - type = read_pci_config_byte(bus, slot, func, + if (func == 0) { + type = read_pci_config_byte(bus, slot, + func, PCI_HEADER_TYPE); - if (!(type & 0x80)) - break; + if (!(type & 0x80)) + break; + } } } } } - diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 9c49919e4d1c..6dd89555fbfa 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -495,26 +495,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); /* - * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have - * 4096 bytes configuration space for each function of their processor - * configuration space. - */ -static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev) -{ - dev->cfg_size = pci_cfg_space_size_ext(dev); -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size); - -/* * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from * confusing the PCI engine: */ diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index f1065b129e9c..4061bb0f267d 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -50,8 +50,6 @@ static int __init pci_legacy_init(void) if (pci_root_bus) pci_bus_add_devices(pci_root_bus); - pcibios_fixup_peer_bridges(); - return 0; } @@ -67,6 +65,7 @@ int __init pci_subsys_init(void) pci_visws_init(); #endif pci_legacy_init(); + pcibios_fixup_peer_bridges(); pcibios_irq_init(); pcibios_init(); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 89bf9242c80a..905bb526b133 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/bitmap.h> +#include <linux/sort.h> #include <asm/e820.h> #include <asm/pci_x86.h> @@ -24,24 +25,49 @@ /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; +static __init int extend_mmcfg(int num) +{ + struct acpi_mcfg_allocation *new; + int new_num = pci_mmcfg_config_num + num; + + new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); + if (!new) + return -1; + + if (pci_mmcfg_config) { + memcpy(new, pci_mmcfg_config, + sizeof(pci_mmcfg_config[0]) * new_num); + kfree(pci_mmcfg_config); + } + pci_mmcfg_config = new; + + return 0; +} + +static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) +{ + int i = pci_mmcfg_config_num; + + pci_mmcfg_config_num++; + pci_mmcfg_config[i].address = addr; + pci_mmcfg_config[i].pci_segment = segment; + pci_mmcfg_config[i].start_bus_number = start; + pci_mmcfg_config[i].end_bus_number = end; +} + static const char __init *pci_mmcfg_e7520(void) { u32 win; raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; - if(win == 0x0000 || win == 0xf000) - pci_mmcfg_config_num = 0; - else { - pci_mmcfg_config_num = 1; - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); - if (!pci_mmcfg_config) - return NULL; - pci_mmcfg_config[0].address = win << 16; - pci_mmcfg_config[0].pci_segment = 0; - pci_mmcfg_config[0].start_bus_number = 0; - pci_mmcfg_config[0].end_bus_number = 255; - } + if (win == 0x0000 || win == 0xf000) + return NULL; + + if (extend_mmcfg(1) == -1) + return NULL; + + fill_one_mmcfg(win << 16, 0, 0, 255); return "Intel Corporation E7520 Memory Controller Hub"; } @@ -50,13 +76,11 @@ static const char __init *pci_mmcfg_intel_945(void) { u32 pciexbar, mask = 0, len = 0; - pci_mmcfg_config_num = 1; - raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) - pci_mmcfg_config_num = 0; + return NULL; /* Size bits */ switch ((pciexbar >> 1) & 3) { @@ -73,28 +97,23 @@ static const char __init *pci_mmcfg_intel_945(void) len = 0x04000000U; break; default: - pci_mmcfg_config_num = 0; + return NULL; } /* Errata #2, things break when not aligned on a 256Mb boundary */ /* Can only happen in 64M/128M mode */ if ((pciexbar & mask) & 0x0fffffffU) - pci_mmcfg_config_num = 0; + return NULL; /* Don't hit the APIC registers and their friends */ if ((pciexbar & mask) >= 0xf0000000U) - pci_mmcfg_config_num = 0; - - if (pci_mmcfg_config_num) { - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); - if (!pci_mmcfg_config) - return NULL; - pci_mmcfg_config[0].address = pciexbar & mask; - pci_mmcfg_config[0].pci_segment = 0; - pci_mmcfg_config[0].start_bus_number = 0; - pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1; - } + return NULL; + + if (extend_mmcfg(1) == -1) + return NULL; + + fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } @@ -138,22 +157,77 @@ static const char __init *pci_mmcfg_amd_fam10h(void) busnbits = 8; } - pci_mmcfg_config_num = (1 << segnbits); - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) * - pci_mmcfg_config_num, GFP_KERNEL); - if (!pci_mmcfg_config) + if (extend_mmcfg(1 << segnbits) == -1) return NULL; - for (i = 0; i < (1 << segnbits); i++) { - pci_mmcfg_config[i].address = base + (1<<28) * i; - pci_mmcfg_config[i].pci_segment = i; - pci_mmcfg_config[i].start_bus_number = 0; - pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1; - } + for (i = 0; i < (1 << segnbits); i++) + fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); return "AMD Family 10h NB"; } +static bool __initdata mcp55_checked; +static const char __init *pci_mmcfg_nvidia_mcp55(void) +{ + int bus; + int mcp55_mmconf_found = 0; + + static const u32 extcfg_regnum = 0x90; + static const u32 extcfg_regsize = 4; + static const u32 extcfg_enable_mask = 1<<31; + static const u32 extcfg_start_mask = 0xff<<16; + static const int extcfg_start_shift = 16; + static const u32 extcfg_size_mask = 0x3<<28; + static const int extcfg_size_shift = 28; + static const int extcfg_sizebus[] = {0x100, 0x80, 0x40, 0x20}; + static const u32 extcfg_base_mask[] = {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff}; + static const int extcfg_base_lshift = 25; + + /* + * do check if amd fam10h already took over + */ + if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) + return NULL; + + mcp55_checked = true; + for (bus = 0; bus < 256; bus++) { + u64 base; + u32 l, extcfg; + u16 vendor, device; + int start, size_index, end; + + raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + + if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device) + continue; + + raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum, + extcfg_regsize, &extcfg); + + if (!(extcfg & extcfg_enable_mask)) + continue; + + if (extend_mmcfg(1) == -1) + continue; + + size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; + base = extcfg & extcfg_base_mask[size_index]; + /* base could > 4G */ + base <<= extcfg_base_lshift; + start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; + end = start + extcfg_sizebus[size_index] - 1; + fill_one_mmcfg(base, 0, start, end); + mcp55_mmconf_found++; + } + + if (!mcp55_mmconf_found) + return NULL; + + return "nVidia MCP55"; +} + struct pci_mmcfg_hostbridge_probe { u32 bus; u32 devfn; @@ -171,8 +245,52 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { 0x1200, pci_mmcfg_amd_fam10h }, { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, 0x1200, pci_mmcfg_amd_fam10h }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA, + 0x0369, pci_mmcfg_nvidia_mcp55 }, }; +static int __init cmp_mmcfg(const void *x1, const void *x2) +{ + const typeof(pci_mmcfg_config[0]) *m1 = x1; + const typeof(pci_mmcfg_config[0]) *m2 = x2; + int start1, start2; + + start1 = m1->start_bus_number; + start2 = m2->start_bus_number; + + return start1 - start2; +} + +static void __init pci_mmcfg_check_end_bus_number(void) +{ + int i; + typeof(pci_mmcfg_config[0]) *cfg, *cfgx; + + /* sort them at first */ + sort(pci_mmcfg_config, pci_mmcfg_config_num, + sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); + + /* last one*/ + if (pci_mmcfg_config_num > 0) { + i = pci_mmcfg_config_num - 1; + cfg = &pci_mmcfg_config[i]; + if (cfg->end_bus_number < cfg->start_bus_number) + cfg->end_bus_number = 255; + } + + /* don't overlap please */ + for (i = 0; i < pci_mmcfg_config_num - 1; i++) { + cfg = &pci_mmcfg_config[i]; + cfgx = &pci_mmcfg_config[i+1]; + + if (cfg->end_bus_number < cfg->start_bus_number) + cfg->end_bus_number = 255; + + if (cfg->end_bus_number >= cfgx->start_bus_number) + cfg->end_bus_number = cfgx->start_bus_number - 1; + } +} + static int __init pci_mmcfg_check_hostbridge(void) { u32 l; @@ -186,31 +304,33 @@ static int __init pci_mmcfg_check_hostbridge(void) pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; - name = NULL; - for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; devfn = pci_mmcfg_probes[i].devfn; raw_pci_ops->read(0, bus, devfn, 0, 4, &l); vendor = l & 0xffff; device = (l >> 16) & 0xffff; + name = NULL; if (pci_mmcfg_probes[i].vendor == vendor && pci_mmcfg_probes[i].device == device) name = pci_mmcfg_probes[i].probe(); - } - if (name) { - printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n", - name, pci_mmcfg_config_num ? "with" : "without"); + if (name) + printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", + name); } - return name != NULL; + /* some end_bus_number is crazy, fix it */ + pci_mmcfg_check_end_bus_number(); + + return pci_mmcfg_config_num != 0; } static void __init pci_mmcfg_insert_resources(void) { -#define PCI_MMCFG_RESOURCE_NAME_LEN 19 +#define PCI_MMCFG_RESOURCE_NAME_LEN 24 int i; struct resource *res; char *names; @@ -228,9 +348,10 @@ static void __init pci_mmcfg_insert_resources(void) struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", - cfg->pci_segment); - res->start = cfg->address; + snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number); + res->start = cfg->address + (cfg->start_bus_number << 20); res->end = res->start + (num_buses << 20) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); @@ -354,8 +475,6 @@ static void __init pci_mmcfg_reject_broken(int early) (pci_mmcfg_config[0].address == 0)) return; - cfg = &pci_mmcfg_config[0]; - for (i = 0; i < pci_mmcfg_config_num; i++) { int valid = 0; u64 addr, size; @@ -423,10 +542,10 @@ static void __init __pci_mmcfg_init(int early) known_bridge = 1; } - if (!known_bridge) { + if (!known_bridge) acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(early); - } + + pci_mmcfg_reject_broken(early); if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 30007ffc8e11..94349f8b2f96 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -112,13 +112,18 @@ static struct pci_raw_ops pci_mmcfg = { static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) { void __iomem *addr; - u32 size; - - size = (cfg->end_bus_number + 1) << 20; - addr = ioremap_nocache(cfg->address, size); + u64 start, size; + + start = cfg->start_bus_number; + start <<= 20; + start += cfg->address; + size = cfg->end_bus_number + 1 - cfg->start_bus_number; + size <<= 20; + addr = ioremap_nocache(start, size); if (addr) { printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", - cfg->address, cfg->address + size - 1); + start, start + size - 1); + addr -= cfg->start_bus_number << 20; } return addr; } @@ -157,7 +162,7 @@ void __init pci_mmcfg_arch_free(void) for (i = 0; i < pci_mmcfg_config_num; ++i) { if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt); + iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); pci_mmcfg_virt[i].virt = NULL; pci_mmcfg_virt[i].cfg = NULL; } diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 5b38a026d122..196f97d00956 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -66,11 +66,18 @@ struct acpi_pci_root { struct acpi_device * device; struct acpi_pci_id id; struct pci_bus *bus; + + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_control_qry; /* the latest _OSC query result */ + + u32 osc_queried:1; /* has _OSC control been queried? */ }; static LIST_HEAD(acpi_pci_roots); static struct acpi_pci_driver *sub_driver; +static DEFINE_MUTEX(osc_lock); int acpi_pci_register_driver(struct acpi_pci_driver *driver) { @@ -185,6 +192,175 @@ static void acpi_pci_bridge_scan(struct acpi_device *device) } } +static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, + 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; + +static acpi_status acpi_pci_run_osc(acpi_handle handle, + const u32 *capbuf, u32 *retval) +{ + acpi_status status; + struct acpi_object_list input; + union acpi_object in_params[4]; + struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *out_obj; + u32 errors; + + /* Setting up input parameters */ + input.count = 4; + input.pointer = in_params; + in_params[0].type = ACPI_TYPE_BUFFER; + in_params[0].buffer.length = 16; + in_params[0].buffer.pointer = OSC_UUID; + in_params[1].type = ACPI_TYPE_INTEGER; + in_params[1].integer.value = 1; + in_params[2].type = ACPI_TYPE_INTEGER; + in_params[2].integer.value = 3; + in_params[3].type = ACPI_TYPE_BUFFER; + in_params[3].buffer.length = 12; + in_params[3].buffer.pointer = (u8 *)capbuf; + + status = acpi_evaluate_object(handle, "_OSC", &input, &output); + if (ACPI_FAILURE(status)) + return status; + + if (!output.length) + return AE_NULL_OBJECT; + + out_obj = output.pointer; + if (out_obj->type != ACPI_TYPE_BUFFER) { + printk(KERN_DEBUG "_OSC evaluation returned wrong type\n"); + status = AE_TYPE; + goto out_kfree; + } + /* Need to ignore the bit0 in result code */ + errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); + if (errors) { + if (errors & OSC_REQUEST_ERROR) + printk(KERN_DEBUG "_OSC request failed\n"); + if (errors & OSC_INVALID_UUID_ERROR) + printk(KERN_DEBUG "_OSC invalid UUID\n"); + if (errors & OSC_INVALID_REVISION_ERROR) + printk(KERN_DEBUG "_OSC invalid revision\n"); + if (errors & OSC_CAPABILITIES_MASK_ERROR) { + if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE) + goto out_success; + printk(KERN_DEBUG + "Firmware did not grant requested _OSC control\n"); + status = AE_SUPPORT; + goto out_kfree; + } + status = AE_ERROR; + goto out_kfree; + } +out_success: + *retval = *((u32 *)(out_obj->buffer.pointer + 8)); + status = AE_OK; + +out_kfree: + kfree(output.pointer); + return status; +} + +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags) +{ + acpi_status status; + u32 support_set, result, capbuf[3]; + + /* do _OSC query for all possible controls */ + support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS); + capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_TYPE] = support_set; + capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + + status = acpi_pci_run_osc(root->device->handle, capbuf, &result); + if (ACPI_SUCCESS(status)) { + root->osc_support_set = support_set; + root->osc_control_qry = result; + root->osc_queried = 1; + } + return status; +} + +static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags) +{ + acpi_status status; + acpi_handle tmp; + + status = acpi_get_handle(root->device->handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + mutex_lock(&osc_lock); + status = acpi_pci_query_osc(root, flags); + mutex_unlock(&osc_lock); + return status; +} + +static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle) +{ + struct acpi_pci_root *root; + list_for_each_entry(root, &acpi_pci_roots, node) { + if (root->device->handle == handle) + return root; + } + return NULL; +} + +/** + * acpi_pci_osc_control_set - commit requested control to Firmware + * @handle: acpi_handle for the target ACPI object + * @flags: driver's requested control bits + * + * Attempt to take control from Firmware on requested control bits. + **/ +acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) +{ + acpi_status status; + u32 control_req, result, capbuf[3]; + acpi_handle tmp; + struct acpi_pci_root *root; + + status = acpi_get_handle(handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + + control_req = (flags & OSC_CONTROL_MASKS); + if (!control_req) + return AE_TYPE; + + root = acpi_pci_find_root(handle); + if (!root) + return AE_NOT_EXIST; + + mutex_lock(&osc_lock); + /* No need to evaluate _OSC if the control was already granted. */ + if ((root->osc_control_set & control_req) == control_req) + goto out; + + /* Need to query controls first before requesting them */ + if (!root->osc_queried) { + status = acpi_pci_query_osc(root, root->osc_support_set); + if (ACPI_FAILURE(status)) + goto out; + } + if ((root->osc_control_qry & control_req) != control_req) { + printk(KERN_DEBUG + "Firmware did not grant requested _OSC control\n"); + status = AE_SUPPORT; + goto out; + } + + capbuf[OSC_QUERY_TYPE] = 0; + capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set; + capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req; + status = acpi_pci_run_osc(handle, capbuf, &result); + if (ACPI_SUCCESS(status)) + root->osc_control_set = result; +out: + mutex_unlock(&osc_lock); + return status; +} +EXPORT_SYMBOL(acpi_pci_osc_control_set); + static int __devinit acpi_pci_root_add(struct acpi_device *device) { int result = 0; @@ -217,7 +393,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) * PCI domains, so we indicate this in _OSC support capabilities. */ flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; - pci_acpi_osc_support(device->handle, flags); + acpi_pci_osc_support(root, flags); /* * Segment @@ -353,7 +529,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (pci_msi_enabled()) flags |= OSC_MSI_SUPPORT; if (flags != base_flags) - pci_acpi_osc_support(device->handle, flags); + acpi_pci_osc_support(root, flags); end: if (result) { diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2a4501dd2515..fdc864f9cf23 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -59,3 +59,13 @@ config HT_IRQ This allows native hypertransport devices to use interrupts. If unsure say Y. + +config PCI_IOV + bool "PCI IOV support" + depends on PCI + help + I/O Virtualization is a PCI feature supported by some devices + which allows them to create virtual devices which share their + physical resources. + + If unsure, say N. diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3d07ce24f6a8..ba6af162fd39 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o +obj-$(CONFIG_PCI_IOV) += iov.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 52b54f053be0..68f91a252595 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus) * * Call hotplug for each new devices. */ -void pci_bus_add_devices(struct pci_bus *bus) +void pci_bus_add_devices(const struct pci_bus *bus) { struct pci_dev *dev; struct pci_bus *child; @@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) { - retval = pci_enable_device(dev); - pci_set_master(dev); + if (atomic_read(&dev->enable_cnt) == 0) { + retval = pci_enable_device(dev); + pci_set_master(dev); + } pci_enable_bridges(dev->subordinate); } } diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 1c1141801060..fbc63d5e459f 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -30,9 +30,8 @@ #include <linux/types.h> #include <linux/pci.h> #include <linux/pci_hotplug.h> +#include <linux/acpi.h> #include <linux/pci-acpi.h> -#include <acpi/acpi.h> -#include <acpi/acpi_bus.h> #define MY_NAME "acpi_pcihp" @@ -333,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, { acpi_status status = AE_NOT_FOUND; acpi_handle handle, phandle; - struct pci_bus *pbus = bus; - struct pci_dev *pdev; - - do { - pdev = pbus->self; - if (!pdev) { - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(pbus), pbus->number); + struct pci_bus *pbus; + + handle = NULL; + for (pbus = bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) break; - } - handle = DEVICE_ACPI_HANDLE(&(pdev->dev)); - pbus = pbus->parent; - } while (!handle); + } /* * _HPP settings apply to all child buses, until another _HPP is @@ -378,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware); * * Attempt to take hotplug control from firmware. */ -int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) { acpi_status status; acpi_handle chandle, handle; - struct pci_dev *pdev = dev; - struct pci_bus *parent; struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | @@ -408,33 +400,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, flags); if (ACPI_SUCCESS(status)) goto got_one; kfree(string.pointer); string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL }; } - pdev = dev; - handle = DEVICE_ACPI_HANDLE(&dev->dev); - while (!handle) { + handle = DEVICE_ACPI_HANDLE(&pdev->dev); + if (!handle) { /* * This hotplug controller was not listed in the ACPI name * space at all. Try to get acpi handle of parent pci bus. */ - if (!pdev || !pdev->bus->parent) - break; - parent = pdev->bus->parent; - dbg("Could not find %s in acpi namespace, trying parent\n", - pci_name(pdev)); - if (!parent->self) - /* Parent must be a host bridge */ - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(parent), - parent->number); - else - handle = DEVICE_ACPI_HANDLE(&(parent->self->dev)); - pdev = parent->self; + struct pci_bus *pbus; + for (pbus = pdev->bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) + break; + } } while (handle) { @@ -453,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) } dbg("Cannot get control of hotplug hardware for pci %s\n", - pci_name(dev)); + pci_name(pdev)); kfree(string.pointer); return -ENODEV; got_one: - dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev), - (char *)string.pointer); + dbg("Gained control for hotplug HW for pci %s (%s)\n", + pci_name(pdev), (char *)string.pointer); kfree(string.pointer); return 0; } diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index d8649e127298..6151389fd903 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -1,395 +1,163 @@ -/* - * Fake PCI Hot Plug Controller Driver +/* Works like the fakephp driver used to, except a little better. * - * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com> - * Copyright (C) 2003 IBM Corp. - * Copyright (C) 2003 Rolf Eike Beer <eike-kernel@sf-tec.de> + * - It's possible to remove devices with subordinate busses. + * - New PCI devices that appear via any method, not just a fakephp triggered + * rescan, will be noticed. + * - Devices that are removed via any method, not just a fakephp triggered + * removal, will also be noticed. * - * Based on ideas and code from: - * Vladimir Kondratiev <vladimir.kondratiev@intel.com> - * Rolf Eike Beer <eike-kernel@sf-tec.de> + * Uses nothing from the pci-hotplug subsystem. * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 2 of the License. - * - * Send feedback to <greg@kroah.com> */ -/* - * - * This driver will "emulate" removing PCI devices from the system. If - * the "power" file is written to with "0" then the specified PCI device - * will be completely removed from the kernel. - * - * WARNING, this does NOT turn off the power to the PCI device. This is - * a "logical" removal, not a physical or electrical removal. - * - * Use this module at your own risk, you have been warned! - * - * Enabling PCI devices is left as an exercise for the reader... - * - */ -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/pci.h> -#include <linux/pci_hotplug.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> #include <linux/init.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/workqueue.h> +#include <linux/pci.h> +#include <linux/device.h> #include "../pci.h" -#if !defined(MODULE) - #define MY_NAME "fakephp" -#else - #define MY_NAME THIS_MODULE->name -#endif - -#define dbg(format, arg...) \ - do { \ - if (debug) \ - printk(KERN_DEBUG "%s: " format, \ - MY_NAME , ## arg); \ - } while (0) -#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg) -#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg) - -#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>" -#define DRIVER_DESC "Fake PCI Hot Plug Controller Driver" - -struct dummy_slot { - struct list_head node; - struct hotplug_slot *slot; - struct pci_dev *dev; - struct work_struct remove_work; - unsigned long removed; +struct legacy_slot { + struct kobject kobj; + struct pci_dev *dev; + struct list_head list; }; -static int debug; -static int dup_slots; -static LIST_HEAD(slot_list); -static struct workqueue_struct *dummyphp_wq; - -static void pci_rescan_worker(struct work_struct *work); -static DECLARE_WORK(pci_rescan_work, pci_rescan_worker); - -static int enable_slot (struct hotplug_slot *slot); -static int disable_slot (struct hotplug_slot *slot); +static LIST_HEAD(legacy_list); -static struct hotplug_slot_ops dummy_hotplug_slot_ops = { - .owner = THIS_MODULE, - .enable_slot = enable_slot, - .disable_slot = disable_slot, -}; - -static void dummy_release(struct hotplug_slot *slot) +static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr, + char *buf) { - struct dummy_slot *dslot = slot->private; - - list_del(&dslot->node); - kfree(dslot->slot->info); - kfree(dslot->slot); - pci_dev_put(dslot->dev); - kfree(dslot); + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + strcpy(buf, "1\n"); + return 2; } -#define SLOT_NAME_SIZE 8 - -static int add_slot(struct pci_dev *dev) +static void remove_callback(void *data) { - struct dummy_slot *dslot; - struct hotplug_slot *slot; - char name[SLOT_NAME_SIZE]; - int retval = -ENOMEM; - static int count = 1; - - slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); - if (!slot) - goto error; - - slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL); - if (!slot->info) - goto error_slot; - - slot->info->power_status = 1; - slot->info->max_bus_speed = PCI_SPEED_UNKNOWN; - slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN; - - dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL); - if (!dslot) - goto error_info; - - if (dup_slots) - snprintf(name, SLOT_NAME_SIZE, "fake"); - else - snprintf(name, SLOT_NAME_SIZE, "fake%d", count++); - dbg("slot->name = %s\n", name); - slot->ops = &dummy_hotplug_slot_ops; - slot->release = &dummy_release; - slot->private = dslot; - - retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name); - if (retval) { - err("pci_hp_register failed with error %d\n", retval); - goto error_dslot; - } - - dbg("slot->name = %s\n", hotplug_slot_name(slot)); - dslot->slot = slot; - dslot->dev = pci_dev_get(dev); - list_add (&dslot->node, &slot_list); - return retval; - -error_dslot: - kfree(dslot); -error_info: - kfree(slot->info); -error_slot: - kfree(slot); -error: - return retval; + pci_remove_bus_device((struct pci_dev *)data); } -static int __init pci_scan_buses(void) +static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) { - struct pci_dev *dev = NULL; - int lastslot = 0; + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + unsigned long val; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (PCI_FUNC(dev->devfn) > 0 && - lastslot == PCI_SLOT(dev->devfn)) - continue; - lastslot = PCI_SLOT(dev->devfn); - add_slot(dev); - } + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; - return 0; + if (val) + pci_rescan_bus(slot->dev->bus); + else + sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback, + slot->dev, THIS_MODULE); + return len; } -static void remove_slot(struct dummy_slot *dslot) -{ - int retval; - - dbg("removing slot %s\n", hotplug_slot_name(dslot->slot)); - retval = pci_hp_deregister(dslot->slot); - if (retval) - err("Problem unregistering a slot %s\n", - hotplug_slot_name(dslot->slot)); -} +static struct attribute *legacy_attrs[] = { + &(struct attribute){ .name = "power", .mode = 0644 }, + NULL, +}; -/* called from the single-threaded workqueue handler to remove a slot */ -static void remove_slot_worker(struct work_struct *work) +static void legacy_release(struct kobject *kobj) { - struct dummy_slot *dslot = - container_of(work, struct dummy_slot, remove_work); - remove_slot(dslot); -} + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); -/** - * pci_rescan_slot - Rescan slot - * @temp: Device template. Should be set: bus and devfn. - * - * Tries hard not to re-enable already existing devices; - * also handles scanning of subfunctions. - */ -static int pci_rescan_slot(struct pci_dev *temp) -{ - struct pci_bus *bus = temp->bus; - struct pci_dev *dev; - int func; - u8 hdr_type; - int count = 0; - - if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) { - temp->hdr_type = hdr_type & 0x7f; - if ((dev = pci_get_slot(bus, temp->devfn)) != NULL) - pci_dev_put(dev); - else { - dev = pci_scan_single_device(bus, temp->devfn); - if (dev) { - dbg("New device on %s function %x:%x\n", - bus->name, temp->devfn >> 3, - temp->devfn & 7); - count++; - } - } - /* multifunction device? */ - if (!(hdr_type & 0x80)) - return count; - - /* continue scanning for other functions */ - for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) { - if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) - continue; - temp->hdr_type = hdr_type & 0x7f; - - if ((dev = pci_get_slot(bus, temp->devfn)) != NULL) - pci_dev_put(dev); - else { - dev = pci_scan_single_device(bus, temp->devfn); - if (dev) { - dbg("New device on %s function %x:%x\n", - bus->name, temp->devfn >> 3, - temp->devfn & 7); - count++; - } - } - } - } - - return count; + pci_dev_put(slot->dev); + kfree(slot); } +static struct kobj_type legacy_ktype = { + .sysfs_ops = &(struct sysfs_ops){ + .store = legacy_store, .show = legacy_show + }, + .release = &legacy_release, + .default_attrs = legacy_attrs, +}; -/** - * pci_rescan_bus - Rescan PCI bus - * @bus: the PCI bus to rescan - * - * Call pci_rescan_slot for each possible function of the bus. - */ -static void pci_rescan_bus(const struct pci_bus *bus) +static int legacy_add_slot(struct pci_dev *pdev) { - unsigned int devfn; - struct pci_dev *dev; - int retval; - int found = 0; - dev = alloc_pci_dev(); - if (!dev) - return; + struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL); - dev->bus = (struct pci_bus*)bus; - dev->sysdata = bus->sysdata; - for (devfn = 0; devfn < 0x100; devfn += 8) { - dev->devfn = devfn; - found += pci_rescan_slot(dev); - } - - if (found) { - pci_bus_assign_resources(bus); - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Skip already-added devices */ - if (dev->is_added) - continue; - retval = pci_bus_add_device(dev); - if (retval) - dev_err(&dev->dev, - "Error adding device, continuing\n"); - else - add_slot(dev); - } - pci_bus_add_devices(bus); - } - kfree(dev); -} + if (!slot) + return -ENOMEM; -/* recursively scan all buses */ -static void pci_rescan_buses(const struct list_head *list) -{ - const struct list_head *l; - list_for_each(l,list) { - const struct pci_bus *b = pci_bus_b(l); - pci_rescan_bus(b); - pci_rescan_buses(&b->children); + if (kobject_init_and_add(&slot->kobj, &legacy_ktype, + &pci_slots_kset->kobj, "%s", + dev_name(&pdev->dev))) { + dev_warn(&pdev->dev, "Failed to created legacy fake slot\n"); + return -EINVAL; } -} + slot->dev = pci_dev_get(pdev); -/* initiate rescan of all pci buses */ -static inline void pci_rescan(void) { - pci_rescan_buses(&pci_root_buses); -} - -/* called from the single-threaded workqueue handler to rescan all pci buses */ -static void pci_rescan_worker(struct work_struct *work) -{ - pci_rescan(); -} + list_add(&slot->list, &legacy_list); -static int enable_slot(struct hotplug_slot *hotplug_slot) -{ - /* mis-use enable_slot for rescanning of the pci bus */ - cancel_work_sync(&pci_rescan_work); - queue_work(dummyphp_wq, &pci_rescan_work); return 0; } -static int disable_slot(struct hotplug_slot *slot) +static int legacy_notify(struct notifier_block *nb, + unsigned long action, void *data) { - struct dummy_slot *dslot; - struct pci_dev *dev; - int func; - - if (!slot) - return -ENODEV; - dslot = slot->private; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot)); + struct pci_dev *pdev = to_pci_dev(data); - for (func = 7; func >= 0; func--) { - dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func); - if (!dev) - continue; + if (action == BUS_NOTIFY_ADD_DEVICE) { + legacy_add_slot(pdev); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + struct legacy_slot *slot; - if (test_and_set_bit(0, &dslot->removed)) { - dbg("Slot already scheduled for removal\n"); - pci_dev_put(dev); - return -ENODEV; - } + list_for_each_entry(slot, &legacy_list, list) + if (slot->dev == pdev) + goto found; - /* remove the device from the pci core */ - pci_remove_bus_device(dev); - - /* queue work item to blow away this sysfs entry and other - * parts. - */ - INIT_WORK(&dslot->remove_work, remove_slot_worker); - queue_work(dummyphp_wq, &dslot->remove_work); - - pci_dev_put(dev); + dev_warn(&pdev->dev, "Missing legacy fake slot?"); + return -ENODEV; +found: + kobject_del(&slot->kobj); + list_del(&slot->list); + kobject_put(&slot->kobj); } + return 0; } -static void cleanup_slots (void) -{ - struct list_head *tmp; - struct list_head *next; - struct dummy_slot *dslot; - - destroy_workqueue(dummyphp_wq); - list_for_each_safe (tmp, next, &slot_list) { - dslot = list_entry (tmp, struct dummy_slot, node); - remove_slot(dslot); - } - -} +static struct notifier_block legacy_notifier = { + .notifier_call = legacy_notify +}; -static int __init dummyphp_init(void) +static int __init init_legacy(void) { - info(DRIVER_DESC "\n"); + struct pci_dev *pdev = NULL; - dummyphp_wq = create_singlethread_workqueue(MY_NAME); - if (!dummyphp_wq) - return -ENOMEM; + /* Add existing devices */ + while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) + legacy_add_slot(pdev); - return pci_scan_buses(); + /* Be alerted of any new ones */ + bus_register_notifier(&pci_bus_type, &legacy_notifier); + return 0; } +module_init(init_legacy); - -static void __exit dummyphp_exit(void) +static void __exit remove_legacy(void) { - cleanup_slots(); + struct legacy_slot *slot, *tmp; + + bus_unregister_notifier(&pci_bus_type, &legacy_notifier); + + list_for_each_entry_safe(slot, tmp, &legacy_list, list) { + list_del(&slot->list); + kobject_del(&slot->kobj); + kobject_put(&slot->kobj); + } } +module_exit(remove_legacy); -module_init(dummyphp_init); -module_exit(dummyphp_exit); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR("Trent Piepho <xyzzy@speakeasy.org>"); +MODULE_DESCRIPTION("Legacy version of the fakephp interface"); MODULE_LICENSE("GPL"); -module_param(debug, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); -module_param(dup_slots, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging"); diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 39ae37589fda..0a368547e633 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -46,10 +46,10 @@ extern int pciehp_force; extern struct workqueue_struct *pciehp_wq; #define dbg(format, arg...) \ - do { \ - if (pciehp_debug) \ - printk("%s: " format, MY_NAME , ## arg); \ - } while (0) +do { \ + if (pciehp_debug) \ + printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \ +} while (0) #define err(format, arg...) \ printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) \ @@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq; #define ctrl_dbg(ctrl, format, arg...) \ do { \ if (pciehp_debug) \ - dev_printk(, &ctrl->pcie->device, \ + dev_printk(KERN_DEBUG, &ctrl->pcie->device, \ format, ## arg); \ } while (0) #define ctrl_err(ctrl, format, arg...) \ @@ -108,10 +108,11 @@ struct controller { u32 slot_cap; u8 cap_base; struct timer_list poll_timer; - int cmd_busy; + unsigned int cmd_busy:1; unsigned int no_cmd_complete:1; unsigned int link_active_reporting:1; unsigned int notification_enabled:1; + unsigned int power_fault_detected; }; #define INT_BUTTON_IGNORE 0 diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 438d795f9fe3..96048010e7d9 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -67,37 +67,27 @@ static int __init parse_detect_mode(void) return PCIEHP_DETECT_DEFAULT; } -static struct pcie_port_service_id __initdata port_pci_ids[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; - static int __initdata dup_slot_id; static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); /* Dummy driver for dumplicate name detection */ -static int __init dummy_probe(struct pcie_device *dev, - const struct pcie_port_service_id *id) +static int __init dummy_probe(struct pcie_device *dev) { int pos; u32 slot_cap; struct slot *slot, *tmp; struct pci_dev *pdev = dev->port; struct pci_bus *pbus = pdev->subordinate; - if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL))) - return -ENOMEM; /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ if (pciehp_get_hp_hw_control_from_firmware(pdev)) return -ENODEV; if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP))) return -ENODEV; pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap); + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) + return -ENOMEM; slot->number = slot_cap >> 19; list_for_each_entry(tmp, &dummy_slots, slot_list) { if (tmp->number == slot->number) @@ -111,7 +101,8 @@ static int __init dummy_probe(struct pcie_device *dev, static struct pcie_port_service_driver __initdata dummy_driver = { .name = "pciehp_dummy", - .id_table = port_pci_ids, + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = dummy_probe, }; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 681e3912b821..fb254b2454de 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe return 0; } -static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id) +static int pciehp_probe(struct pcie_device *dev) { int rc; struct controller *ctrl; @@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev) } #ifdef CONFIG_PM -static int pciehp_suspend (struct pcie_device *dev, pm_message_t state) +static int pciehp_suspend (struct pcie_device *dev) { dev_info(&dev->device, "%s ENTRY\n", __func__); return 0; @@ -503,20 +503,12 @@ static int pciehp_resume (struct pcie_device *dev) } return 0; } -#endif - -static struct pcie_port_service_id port_pci_ids[] = { { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; +#endif /* PM */ static struct pcie_port_service_driver hpdriver_portdrv = { .name = PCIE_MODULE_NAME, - .id_table = &port_pci_ids[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = pciehp_probe, .remove = pciehp_remove, diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 7a16c6897bb9..07bd32151146 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot) slot_cmd = POWER_ON; cmd_mask = PCI_EXP_SLTCTL_PCC; - /* Enable detection that we turned off at slot power-off time */ if (!pciehp_poll_mode) { - slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); - cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); + /* Enable power fault detection turned off at power off time */ + slot_cmd |= PCI_EXP_SLTCTL_PFDE; + cmd_mask |= PCI_EXP_SLTCTL_PFDE; } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - if (retval) { ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd); - return -1; + return retval; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl->power_fault_detected = 0; return retval; } @@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot) slot_cmd = POWER_OFF; cmd_mask = PCI_EXP_SLTCTL_PCC; - /* - * If we get MRL or presence detect interrupts now, the isr - * will notice the sticky power-fault bit too and issue power - * indicator change commands. This will lead to an endless loop - * of command completions, since the power-fault bit remains on - * till the slot is powered on again. - */ if (!pciehp_poll_mode) { - slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); - cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); + /* Disable power fault detection */ + slot_cmd &= ~PCI_EXP_SLTCTL_PFDE; + cmd_mask |= PCI_EXP_SLTCTL_PFDE; } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); @@ -672,10 +662,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC); + detected &= ~intr_loc; intr_loc |= detected; if (!intr_loc) return IRQ_NONE; - if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) { + if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) { ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n", __func__); return IRQ_NONE; @@ -709,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) pciehp_handle_presence_change(p_slot); /* Check Power Fault Detected */ - if (intr_loc & PCI_EXP_SLTSTA_PFD) + if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { + ctrl->power_fault_detected = 1; pciehp_handle_power_fault(p_slot); - + } return IRQ_HANDLED; } diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index 6aba0b6cf2e0..974e924ca96d 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -48,10 +48,10 @@ extern int shpchp_debug; extern struct workqueue_struct *shpchp_wq; #define dbg(format, arg...) \ - do { \ - if (shpchp_debug) \ - printk("%s: " format, MY_NAME , ## arg); \ - } while (0) +do { \ + if (shpchp_debug) \ + printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \ +} while (0) #define err(format, arg...) \ printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) \ @@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq; #define ctrl_dbg(ctrl, format, arg...) \ do { \ if (shpchp_debug) \ - dev_printk(, &ctrl->pci_dev->dev, \ + dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev, \ format, ## arg); \ } while (0) #define ctrl_err(ctrl, format, arg...) \ diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c index 138f161becc0..aa315e52529b 100644 --- a/drivers/pci/hotplug/shpchp_pci.c +++ b/drivers/pci/hotplug/shpchp_pci.c @@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot) busnr)) break; } - if (busnr >= end) { + if (busnr > end) { ctrl_err(ctrl, "No free bus for hot-added bridge\n"); pci_dev_put(dev); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 49402c399232..9dbd5066acaf 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1782,7 +1782,7 @@ static inline void iommu_prepare_isa(void) ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); if (ret) - printk("IOMMU: Failed to create 0-64M identity map, " + printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " "floppy might not work\n"); } diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c new file mode 100644 index 000000000000..7227efc760db --- /dev/null +++ b/drivers/pci/iov.c @@ -0,0 +1,680 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com> + * + * PCI Express I/O Virtualization (IOV) support. + * Single Root IOV 1.0 + */ + +#include <linux/pci.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/delay.h> +#include "pci.h" + +#define VIRTFN_ID_LEN 16 + +static inline u8 virtfn_bus(struct pci_dev *dev, int id) +{ + return dev->bus->number + ((dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) >> 8); +} + +static inline u8 virtfn_devfn(struct pci_dev *dev, int id) +{ + return (dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) & 0xff; +} + +static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) +{ + int rc; + struct pci_bus *child; + + if (bus->number == busnr) + return bus; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (child) + return child; + + child = pci_add_new_bus(bus, NULL, busnr); + if (!child) + return NULL; + + child->subordinate = busnr; + child->dev.parent = bus->bridge; + rc = pci_bus_add_child(child); + if (rc) { + pci_remove_bus(child); + return NULL; + } + + return child; +} + +static void virtfn_remove_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + BUG_ON(!child); + + if (list_empty(&child->devices)) + pci_remove_bus(child); +} + +static int virtfn_add(struct pci_dev *dev, int id, int reset) +{ + int i; + int rc; + u64 size; + char buf[VIRTFN_ID_LEN]; + struct pci_dev *virtfn; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + virtfn = alloc_pci_dev(); + if (!virtfn) + return -ENOMEM; + + mutex_lock(&iov->dev->sriov->lock); + virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + if (!virtfn->bus) { + kfree(virtfn); + mutex_unlock(&iov->dev->sriov->lock); + return -ENOMEM; + } + virtfn->devfn = virtfn_devfn(dev, id); + virtfn->vendor = dev->vendor; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); + pci_setup_device(virtfn); + virtfn->dev.parent = dev->dev.parent; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (!res->parent) + continue; + virtfn->resource[i].name = pci_name(virtfn); + virtfn->resource[i].flags = res->flags; + size = resource_size(res); + do_div(size, iov->total); + virtfn->resource[i].start = res->start + size * id; + virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + rc = request_resource(res, &virtfn->resource[i]); + BUG_ON(rc); + } + + if (reset) + pci_execute_reset_function(virtfn); + + pci_device_add(virtfn, virtfn->bus); + mutex_unlock(&iov->dev->sriov->lock); + + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; + + rc = pci_bus_add_device(virtfn); + if (rc) + goto failed1; + sprintf(buf, "virtfn%u", id); + rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); + if (rc) + goto failed1; + rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); + if (rc) + goto failed2; + + kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + + return 0; + +failed2: + sysfs_remove_link(&dev->dev.kobj, buf); +failed1: + pci_dev_put(dev); + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + return rc; +} + +static void virtfn_remove(struct pci_dev *dev, int id, int reset) +{ + char buf[VIRTFN_ID_LEN]; + struct pci_bus *bus; + struct pci_dev *virtfn; + struct pci_sriov *iov = dev->sriov; + + bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); + if (!bus) + return; + + virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); + if (!virtfn) + return; + + pci_dev_put(virtfn); + + if (reset) { + device_release_driver(&virtfn->dev); + pci_execute_reset_function(virtfn); + } + + sprintf(buf, "virtfn%u", id); + sysfs_remove_link(&dev->dev.kobj, buf); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + pci_dev_put(dev); +} + +static int sriov_migration(struct pci_dev *dev) +{ + u16 status; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return 0; + + if (!(iov->cap & PCI_SRIOV_CAP_VFM)) + return 0; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status); + if (!(status & PCI_SRIOV_STATUS_VFM)) + return 0; + + schedule_work(&iov->mtask); + + return 1; +} + +static void sriov_migration_task(struct work_struct *work) +{ + int i; + u8 state; + u16 status; + struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask); + + for (i = iov->initial; i < iov->nr_virtfn; i++) { + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_MI) { + writeb(PCI_SRIOV_VFM_AV, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 1); + } else if (state == PCI_SRIOV_VFM_MO) { + virtfn_remove(iov->self, i, 1); + writeb(PCI_SRIOV_VFM_UA, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 0); + } + } + + pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status); + status &= ~PCI_SRIOV_STATUS_VFM; + pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status); +} + +static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn) +{ + int bir; + u32 table; + resource_size_t pa; + struct pci_sriov *iov = dev->sriov; + + if (nr_virtfn <= iov->initial) + return 0; + + pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table); + bir = PCI_SRIOV_VFM_BIR(table); + if (bir > PCI_STD_RESOURCE_END) + return -EIO; + + table = PCI_SRIOV_VFM_OFFSET(table); + if (table + nr_virtfn > pci_resource_len(dev, bir)) + return -EIO; + + pa = pci_resource_start(dev, bir) + table; + iov->mstate = ioremap(pa, nr_virtfn); + if (!iov->mstate) + return -ENOMEM; + + INIT_WORK(&iov->mtask, sriov_migration_task); + + iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR; + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + return 0; +} + +static void sriov_disable_migration(struct pci_dev *dev) +{ + struct pci_sriov *iov = dev->sriov; + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + cancel_work_sync(&iov->mtask); + iounmap(iov->mstate); +} + +static int sriov_enable(struct pci_dev *dev, int nr_virtfn) +{ + int rc; + int i, j; + int nres; + u16 offset, stride, initial; + struct resource *res; + struct pci_dev *pdev; + struct pci_sriov *iov = dev->sriov; + + if (!nr_virtfn) + return 0; + + if (iov->nr_virtfn) + return -EINVAL; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); + if (initial > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) + return -EIO; + + if (nr_virtfn < 0 || nr_virtfn > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) + return -EINVAL; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (nr_virtfn > 1 && !stride)) + return -EIO; + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (res->parent) + nres++; + } + if (nres != iov->nres) { + dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); + return -ENOMEM; + } + + iov->offset = offset; + iov->stride = stride; + + if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { + dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + return -ENOMEM; + } + + if (iov->link != dev->devfn) { + pdev = pci_get_slot(dev->bus, iov->link); + if (!pdev) + return -ENODEV; + + pci_dev_put(pdev); + + if (!pdev->is_physfn) + return -ENODEV; + + rc = sysfs_create_link(&dev->dev.kobj, + &pdev->dev.kobj, "dep_link"); + if (rc) + return rc; + } + + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_unblock_user_cfg_access(dev); + + iov->initial = initial; + if (nr_virtfn < initial) + initial = nr_virtfn; + + for (i = 0; i < initial; i++) { + rc = virtfn_add(dev, i, 0); + if (rc) + goto failed; + } + + if (iov->cap & PCI_SRIOV_CAP_VFM) { + rc = sriov_enable_migration(dev, nr_virtfn); + if (rc) + goto failed; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); + iov->nr_virtfn = nr_virtfn; + + return 0; + +failed: + for (j = 0; j < i; j++) + virtfn_remove(dev, j, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + return rc; +} + +static void sriov_disable(struct pci_dev *dev) +{ + int i; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return; + + if (iov->cap & PCI_SRIOV_CAP_VFM) + sriov_disable_migration(dev); + + for (i = 0; i < iov->nr_virtfn; i++) + virtfn_remove(dev, i, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + iov->nr_virtfn = 0; +} + +static int sriov_init(struct pci_dev *dev, int pos) +{ + int i; + int rc; + int nres; + u32 pgsz; + u16 ctrl, total, offset, stride; + struct pci_sriov *iov; + struct resource *res; + struct pci_dev *pdev; + + if (dev->pcie_type != PCI_EXP_TYPE_RC_END && + dev->pcie_type != PCI_EXP_TYPE_ENDPOINT) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) { + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); + ssleep(1); + } + + pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); + if (!total) + return 0; + + ctrl = 0; + list_for_each_entry(pdev, &dev->bus->devices, bus_list) + if (pdev->is_physfn) + goto found; + + pdev = NULL; + if (pci_ari_enabled(dev->bus)) + ctrl |= PCI_SRIOV_CTRL_ARI; + +found: + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); + pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (total > 1 && !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); + i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; + pgsz &= ~((1 << i) - 1); + if (!pgsz) + return -EIO; + + pgsz &= ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + i += __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); + if (!res->flags) + continue; + if (resource_size(res) & (PAGE_SIZE - 1)) { + rc = -EIO; + goto failed; + } + res->end = res->start + resource_size(res) * total - 1; + nres++; + } + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) { + rc = -ENOMEM; + goto failed; + } + + iov->pos = pos; + iov->nres = nres; + iov->ctrl = ctrl; + iov->total = total; + iov->offset = offset; + iov->stride = stride; + iov->pgsz = pgsz; + iov->self = dev; + pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); + pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); + + if (pdev) + iov->dev = pci_dev_get(pdev); + else { + iov->dev = dev; + mutex_init(&iov->lock); + } + + dev->sriov = iov; + dev->is_physfn = 1; + + return 0; + +failed: + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + res->flags = 0; + } + + return rc; +} + +static void sriov_release(struct pci_dev *dev) +{ + BUG_ON(dev->sriov->nr_virtfn); + + if (dev == dev->sriov->dev) + mutex_destroy(&dev->sriov->lock); + else + pci_dev_put(dev->sriov->dev); + + kfree(dev->sriov); + dev->sriov = NULL; +} + +static void sriov_restore_state(struct pci_dev *dev) +{ + int i; + u16 ctrl; + struct pci_sriov *iov = dev->sriov; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) + return; + + for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) + pci_update_resource(dev, i); + + pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + if (iov->ctrl & PCI_SRIOV_CTRL_VFE) + msleep(100); +} + +/** + * pci_iov_init - initialize the IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int pos; + + if (!dev->is_pcie) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (pos) + return sriov_init(dev, pos); + + return -ENODEV; +} + +/** + * pci_iov_release - release resources used by the IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_release(dev); +} + +/** + * pci_iov_resource_bar - get position of the SR-IOV BAR + * @dev: the PCI device + * @resno: the resource number + * @type: the BAR type to be filled in + * + * Returns position of the BAR encapsulated in the SR-IOV capability. + */ +int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) + return 0; + + BUG_ON(!dev->is_physfn); + + *type = pci_bar_unknown; + + return dev->sriov->pos + PCI_SRIOV_BAR + + 4 * (resno - PCI_IOV_RESOURCES); +} + +/** + * pci_restore_iov_state - restore the state of the IOV capability + * @dev: the PCI device + */ +void pci_restore_iov_state(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_restore_state(dev); +} + +/** + * pci_iov_bus_range - find bus range used by Virtual Function + * @bus: the PCI bus + * + * Returns max number of buses (exclude current one) used by Virtual + * Functions. + */ +int pci_iov_bus_range(struct pci_bus *bus) +{ + int max = 0; + u8 busnr; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!dev->is_physfn) + continue; + busnr = virtfn_bus(dev, dev->sriov->total - 1); + if (busnr > max) + max = busnr; + } + + return max ? max - bus->number : 0; +} + +/** + * pci_enable_sriov - enable the SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + return sriov_enable(dev, nr_virtfn); +} +EXPORT_SYMBOL_GPL(pci_enable_sriov); + +/** + * pci_disable_sriov - disable the SR-IOV capability + * @dev: the PCI device + */ +void pci_disable_sriov(struct pci_dev *dev) +{ + might_sleep(); + + if (!dev->is_physfn) + return; + + sriov_disable(dev); +} +EXPORT_SYMBOL_GPL(pci_disable_sriov); + +/** + * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration + * @dev: the PCI device + * + * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not. + * + * Physical Function driver is responsible to register IRQ handler using + * VF Migration Interrupt Message Number, and call this function when the + * interrupt is generated by the hardware. + */ +irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + if (!dev->is_physfn) + return IRQ_NONE; + + return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; +} +EXPORT_SYMBOL_GPL(pci_sriov_migration); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index baba2eb5367d..6f2e6295e773 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -27,48 +27,53 @@ static int pci_msi_enable = 1; /* Arch hooks */ -int __attribute__ ((weak)) -arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +#ifndef arch_msi_check_device +int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) { return 0; } +#endif -int __attribute__ ((weak)) -arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) -{ - return 0; -} - -int __attribute__ ((weak)) -arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +#ifndef arch_setup_msi_irqs +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; + /* + * If an architecture wants to support multiple MSI, it needs to + * override arch_setup_msi_irqs() + */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(entry, &dev->msi_list, list) { ret = arch_setup_msi_irq(dev, entry); - if (ret) + if (ret < 0) return ret; + if (ret > 0) + return -ENOSPC; } return 0; } +#endif -void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) -{ - return; -} - -void __attribute__ ((weak)) -arch_teardown_msi_irqs(struct pci_dev *dev) +#ifndef arch_teardown_msi_irqs +void arch_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq != 0) - arch_teardown_msi_irq(entry->irq); + int i, nvec; + if (entry->irq == 0) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + arch_teardown_msi_irq(entry->irq + i); } } +#endif static void __msi_set_enable(struct pci_dev *dev, int pos, int enable) { @@ -111,27 +116,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x) return (1 << (1 << x)) - 1; } -static void msix_flush_writes(struct irq_desc *desc) +static inline __attribute_const__ u32 msi_capable_mask(u16 control) { - struct msi_desc *entry; + return msi_mask((control >> 1) & 7); +} - entry = get_irq_desc_msi(desc); - BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - /* nothing to do */ - break; - case PCI_CAP_ID_MSIX: - { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; - } +static inline __attribute_const__ u32 msi_enabled_mask(u16 control) +{ + return msi_mask((control >> 4) & 7); } /* @@ -143,49 +135,71 @@ static void msix_flush_writes(struct irq_desc *desc) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) +static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - struct msi_desc *entry; + u32 mask_bits = desc->masked; - entry = get_irq_desc_msi(desc); - BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - if (entry->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long)entry->mask_base; - pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(entry->dev, pos, mask_bits); - } else { - return 0; - } - break; - case PCI_CAP_ID_MSIX: - { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - writel(flag, entry->mask_base + offset); - readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; + if (!desc->msi_attrib.maskbit) + return; + + mask_bits &= ~mask; + mask_bits |= flag; + pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); + desc->masked = mask_bits; +} + +/* + * This internal function does not flush PCI writes to the device. + * All users must ensure that they read from the device before either + * assuming that the device state is up to date, or returning out of this + * file. This saves a few milliseconds when initialising devices with lots + * of MSI-X interrupts. + */ +static void msix_mask_irq(struct msi_desc *desc, u32 flag) +{ + u32 mask_bits = desc->masked; + unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + mask_bits &= ~1; + mask_bits |= flag; + writel(mask_bits, desc->mask_base + offset); + desc->masked = mask_bits; +} + +static void msi_set_mask_bit(unsigned irq, u32 flag) +{ + struct msi_desc *desc = get_irq_msi(irq); + + if (desc->msi_attrib.is_msix) { + msix_mask_irq(desc, flag); + readl(desc->mask_base); /* Flush write to device */ + } else { + unsigned offset = irq - desc->dev->irq; + msi_mask_irq(desc, 1 << offset, flag << offset); } - entry->msi_attrib.masked = !!flag; - return 1; +} + +void mask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 1); +} + +void unmask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 0); } void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch(entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; u16 data; @@ -201,21 +215,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_read_config_word(dev, msi_data_reg(pos, 0), &data); } msg->data = data; - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } } @@ -229,11 +228,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + writel(msg->address_lo, + base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + writel(msg->address_hi, + base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; + u16 msgctl; + + pci_read_config_word(dev, msi_control_reg(pos), &msgctl); + msgctl &= ~PCI_MSI_FLAGS_QSIZE; + msgctl |= entry->msi_attrib.multiple << 4; + pci_write_config_word(dev, msi_control_reg(pos), msgctl); pci_write_config_dword(dev, msi_lower_address_reg(pos), msg->address_lo); @@ -246,23 +259,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_write_config_word(dev, msi_data_reg(pos, 0), msg->data); } - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - writel(msg->address_lo, - base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(msg->address_hi, - base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } entry->msg = *msg; } @@ -274,37 +270,18 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) write_msi_msg_desc(desc, msg); } -void mask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 1); - msix_flush_writes(desc); -} - -void unmask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 0); - msix_flush_writes(desc); -} - static int msi_free_irqs(struct pci_dev* dev); -static struct msi_desc* alloc_msi_entry(void) +static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) { - struct msi_desc *entry; - - entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL); - if (!entry) + struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) return NULL; - INIT_LIST_HEAD(&entry->list); - entry->irq = 0; - entry->dev = NULL; + INIT_LIST_HEAD(&desc->list); + desc->dev = dev; - return entry; + return desc; } static void pci_intx_for_msi(struct pci_dev *dev, int enable) @@ -328,15 +305,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) { - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, - entry->msi_attrib.masked); - } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + msi_mask_irq(entry, msi_capable_mask(control), entry->masked); control &= ~PCI_MSI_FLAGS_QSIZE; - control |= PCI_MSI_FLAGS_ENABLE; + control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); } @@ -354,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { - struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); + msix_mask_irq(entry, entry->masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -378,52 +350,48 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state); /** * msi_capability_init - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate * - * Setup the MSI capability structure of device function with a single - * MSI irq, regardless of device function is capable of handling - * multiple messages. A return of zero indicates the successful setup - * of an entry zero with the new MSI irq or non-zero for otherwise. - **/ -static int msi_capability_init(struct pci_dev *dev) + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI irq. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec) { struct msi_desc *entry; int pos, ret; u16 control; + unsigned mask; msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ pos = pci_find_capability(dev, PCI_CAP_ID_MSI); pci_read_config_word(dev, msi_control_reg(pos), &control); /* MSI Entry Initialization */ - entry = alloc_msi_entry(); + entry = alloc_msi_entry(dev); if (!entry) return -ENOMEM; - entry->msi_attrib.type = PCI_CAP_ID_MSI; + entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.pos = pos; - entry->dev = dev; - if (entry->msi_attrib.maskbit) { - unsigned int base, maskbits, temp; - - base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); - entry->mask_base = (void __iomem *)(long)base; - - /* All MSIs are unmasked by default, Mask them all */ - pci_read_config_dword(dev, base, &maskbits); - temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); - maskbits |= temp; - pci_write_config_dword(dev, base, maskbits); - entry->msi_attrib.maskbits_mask = temp; - } + + entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); + /* All MSIs are unmasked by default, Mask them all */ + if (entry->msi_attrib.maskbit) + pci_read_config_dword(dev, entry->mask_pos, &entry->masked); + mask = msi_capable_mask(control); + msi_mask_irq(entry, mask, mask); + list_add_tail(&entry->list, &dev->msi_list); /* Configure MSI capability structure */ - ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI); + ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { msi_free_irqs(dev); return ret; @@ -476,26 +444,28 @@ static int msix_capability_init(struct pci_dev *dev, /* MSI-X Table Initialization */ for (i = 0; i < nvec; i++) { - entry = alloc_msi_entry(); + entry = alloc_msi_entry(dev); if (!entry) break; j = entries[i].entry; - entry->msi_attrib.type = PCI_CAP_ID_MSIX; + entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; - entry->msi_attrib.maskbit = 1; - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.pos = pos; - entry->dev = dev; entry->mask_base = base; + entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + msix_mask_irq(entry, 1); list_add_tail(&entry->list, &dev->msi_list); } ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); - if (ret) { + if (ret < 0) { + /* If we had some success report the number of irqs + * we succeeded in setting up. */ int avail = 0; list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq != 0) { @@ -503,14 +473,13 @@ static int msix_capability_init(struct pci_dev *dev, } } - msi_free_irqs(dev); + if (avail != 0) + ret = avail; + } - /* If we had some success report the number of irqs - * we succeeded in setting up. - */ - if (avail == 0) - avail = ret; - return avail; + if (ret) { + msi_free_irqs(dev); + return ret; } i = 0; @@ -575,39 +544,54 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) } /** - * pci_enable_msi - configure device's MSI capability structure - * @dev: pointer to the pci_dev data structure of MSI device function + * pci_enable_msi_block - configure device's MSI capability structure + * @dev: device to configure + * @nvec: number of interrupts to configure * - * Setup the MSI capability structure of device function with - * a single MSI irq upon its software driver call to request for - * MSI mode enabled on its hardware device function. A return of zero - * indicates the successful setup of an entry zero with the new MSI - * irq or non-zero for otherwise. - **/ -int pci_enable_msi(struct pci_dev* dev) + * Allocate IRQs for a device with the MSI capability. + * This function returns a negative errno if an error occurs. If it + * is unable to allocate the number of interrupts requested, it returns + * the number of interrupts it might be able to allocate. If it successfully + * allocates at least the number of interrupts requested, it returns 0 and + * updates the @dev's irq member to the lowest new interrupt number; the + * other interrupt numbers allocated to this device are consecutive. + */ +int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { - int status; + int status, pos, maxvec; + u16 msgctl; - status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + if (nvec > maxvec) + return maxvec; + + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); if (status) return status; WARN_ON(!!dev->msi_enabled); - /* Check whether driver already requested for MSI-X irqs */ + /* Check whether driver already requested MSI-X irqs */ if (dev->msix_enabled) { dev_info(&dev->dev, "can't enable MSI " "(MSI-X already enabled)\n"); return -EINVAL; } - status = msi_capability_init(dev); + + status = msi_capability_init(dev, nvec); return status; } -EXPORT_SYMBOL(pci_enable_msi); +EXPORT_SYMBOL(pci_enable_msi_block); -void pci_msi_shutdown(struct pci_dev* dev) +void pci_msi_shutdown(struct pci_dev *dev) { - struct msi_desc *entry; + struct msi_desc *desc; + u32 mask; + u16 ctrl; if (!pci_msi_enable || !dev || !dev->msi_enabled) return; @@ -617,19 +601,15 @@ void pci_msi_shutdown(struct pci_dev* dev) dev->msi_enabled = 0; BUG_ON(list_empty(&dev->msi_list)); - entry = list_entry(dev->msi_list.next, struct msi_desc, list); - /* Return the the pci reset with msi irqs unmasked */ - if (entry->msi_attrib.maskbit) { - u32 mask = entry->msi_attrib.maskbits_mask; - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, mask, ~mask); - } - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) - return; + desc = list_first_entry(&dev->msi_list, struct msi_desc, list); + pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl); + mask = msi_capable_mask(ctrl); + msi_mask_irq(desc, mask, ~mask); /* Restore dev->irq to its default pin-assertion irq */ - dev->irq = entry->msi_attrib.default_irq; + dev->irq = desc->msi_attrib.default_irq; } + void pci_disable_msi(struct pci_dev* dev) { struct msi_desc *entry; @@ -640,7 +620,7 @@ void pci_disable_msi(struct pci_dev* dev) pci_msi_shutdown(dev); entry = list_entry(dev->msi_list.next, struct msi_desc, list); - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + if (entry->msi_attrib.is_msix) return; msi_free_irqs(dev); @@ -652,14 +632,18 @@ static int msi_free_irqs(struct pci_dev* dev) struct msi_desc *entry, *tmp; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq) - BUG_ON(irq_has_action(entry->irq)); + int i, nvec; + if (!entry->irq) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + BUG_ON(irq_has_action(entry->irq + i)); } arch_teardown_msi_irqs(dev); list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { - if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) { + if (entry->msi_attrib.is_msix) { writel(1, entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); @@ -675,6 +659,23 @@ static int msi_free_irqs(struct pci_dev* dev) } /** + * pci_msix_table_size - return the number of device's MSI-X table entries + * @dev: pointer to the pci_dev data structure of MSI-X device function + */ +int pci_msix_table_size(struct pci_dev *dev) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (!pos) + return 0; + + pci_read_config_word(dev, msi_control_reg(pos), &control); + return multi_msix_capable(control); +} + +/** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function * @entries: pointer to an array of MSI-X entries @@ -691,9 +692,8 @@ static int msi_free_irqs(struct pci_dev* dev) **/ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) { - int status, pos, nr_entries; + int status, nr_entries; int i, j; - u16 control; if (!entries) return -EINVAL; @@ -702,9 +702,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) if (status) return status; - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - pci_read_config_word(dev, msi_control_reg(pos), &control); - nr_entries = multi_msix_capable(control); + nr_entries = pci_msix_table_size(dev); if (nvec > nr_entries) return -EINVAL; diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 3898f5237144..71f4df2ef654 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -20,14 +20,8 @@ #define msi_mask_bits_reg(base, is64bit) \ ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) #define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE -#define multi_msi_capable(control) \ - (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) -#define multi_msi_enable(control, num) \ - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) -#define msi_enable(control, num) multi_msi_enable(control, num); \ - control |= PCI_MSI_FLAGS_ENABLE #define msix_table_offset_reg(base) (base + 0x04) #define msix_pba_offset_reg(base) (base + 0x08) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index deea8a187eb8..fac5eddcefd2 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -18,221 +18,6 @@ #include <linux/pci-acpi.h> #include "pci.h" -struct acpi_osc_data { - acpi_handle handle; - u32 support_set; - u32 control_set; - u32 control_query; - int is_queried; - struct list_head sibiling; -}; -static LIST_HEAD(acpi_osc_data_list); - -struct acpi_osc_args { - u32 capbuf[3]; -}; - -static DEFINE_MUTEX(pci_acpi_lock); - -static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) -{ - struct acpi_osc_data *data; - - list_for_each_entry(data, &acpi_osc_data_list, sibiling) { - if (data->handle == handle) - return data; - } - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return NULL; - INIT_LIST_HEAD(&data->sibiling); - data->handle = handle; - list_add_tail(&data->sibiling, &acpi_osc_data_list); - return data; -} - -static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, - 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; - -static acpi_status acpi_run_osc(acpi_handle handle, - struct acpi_osc_args *osc_args, u32 *retval) -{ - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE]; - - /* Setting up input parameters */ - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 3; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)osc_args->capbuf; - - status = acpi_evaluate_object(handle, "_OSC", &input, &output); - if (ACPI_FAILURE(status)) - return status; - - if (!output.length) - return AE_NULL_OBJECT; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n"); - status = AE_TYPE; - goto out_kfree; - } - /* Need to ignore the bit0 in result code */ - errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - if (errors & OSC_REQUEST_ERROR) - printk(KERN_DEBUG "_OSC request fails\n"); - if (errors & OSC_INVALID_UUID_ERROR) - printk(KERN_DEBUG "_OSC invalid UUID\n"); - if (errors & OSC_INVALID_REVISION_ERROR) - printk(KERN_DEBUG "_OSC invalid revision\n"); - if (errors & OSC_CAPABILITIES_MASK_ERROR) { - if (flags & OSC_QUERY_ENABLE) - goto out_success; - printk(KERN_DEBUG "_OSC FW not grant req. control\n"); - status = AE_SUPPORT; - goto out_kfree; - } - status = AE_ERROR; - goto out_kfree; - } -out_success: - *retval = *((u32 *)(out_obj->buffer.pointer + 8)); - status = AE_OK; - -out_kfree: - kfree(output.pointer); - return status; -} - -static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) -{ - acpi_status status; - u32 support_set, result; - struct acpi_osc_args osc_args; - - /* do _OSC query for all possible controls */ - support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS); - osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; - - status = acpi_run_osc(osc_data->handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) { - osc_data->support_set = support_set; - osc_data->control_query = result; - osc_data->is_queried = 1; - } - - return status; -} - -/* - * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature - * @flags: Bitmask of flags to support - * - * See the ACPI spec for the definition of the flags - */ -int pci_acpi_osc_support(acpi_handle handle, u32 flags) -{ - acpi_status status; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - int rc = 0; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return -ENOTTY; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - rc = -ENOMEM; - goto out; - } - - __acpi_query_osc(flags, osc_data); -out: - mutex_unlock(&pci_acpi_lock); - return rc; -} - -/** - * pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits - * - * Attempt to take control from Firmware on requested control bits. - **/ -acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{ - acpi_status status; - u32 control_req, control_set, result; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - struct acpi_osc_args osc_args; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - status = AE_ERROR; - goto out; - } - - control_req = (flags & OSC_CONTROL_MASKS); - if (!control_req) { - status = AE_TYPE; - goto out; - } - - /* No need to evaluate _OSC if the control was already granted. */ - if ((osc_data->control_set & control_req) == control_req) - goto out; - - if (!osc_data->is_queried) { - status = __acpi_query_osc(osc_data->support_set, osc_data); - if (ACPI_FAILURE(status)) - goto out; - } - - if ((osc_data->control_query & control_req) != control_req) { - status = AE_SUPPORT; - goto out; - } - - control_set = osc_data->control_set | control_req; - osc_args.capbuf[OSC_QUERY_TYPE] = 0; - osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; - status = acpi_run_osc(handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) - osc_data->control_set = result; -out: - mutex_unlock(&pci_acpi_lock); - return status; -} -EXPORT_SYMBOL(pci_osc_control_set); - /* * _SxD returns the D-state with the highest power * (lowest D-state number) supported in the S-state "x". diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 267de88551c9..c0cbbb5a245e 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) } static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id); +/** + * store_remove_id - remove a PCI device ID from this driver + * @driver: target device driver + * @buf: buffer for scanning device ID data + * @count: input size + * + * Removes a dynamic pci device ID to this driver. + */ +static ssize_t +store_remove_id(struct device_driver *driver, const char *buf, size_t count) +{ + struct pci_dynid *dynid, *n; + struct pci_driver *pdrv = to_pci_driver(driver); + __u32 vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields = 0; + int retval = -ENODEV; + + fields = sscanf(buf, "%x %x %x %x %x %x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + if (fields < 2) + return -EINVAL; + + spin_lock(&pdrv->dynids.lock); + list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) { + struct pci_device_id *id = &dynid->id; + if ((id->vendor == vendor) && + (id->device == device) && + (subvendor == PCI_ANY_ID || id->subvendor == subvendor) && + (subdevice == PCI_ANY_ID || id->subdevice == subdevice) && + !((id->class ^ class) & class_mask)) { + list_del(&dynid->node); + kfree(dynid); + retval = 0; + break; + } + } + spin_unlock(&pdrv->dynids.lock); + + if (retval) + return retval; + return count; +} +static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id); + static void pci_free_dynids(struct pci_driver *drv) { @@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv) { driver_remove_file(&drv->driver, &driver_attr_new_id); } + +static int +pci_create_removeid_file(struct pci_driver *drv) +{ + int error = 0; + if (drv->probe != NULL) + error = driver_create_file(&drv->driver,&driver_attr_remove_id); + return error; +} + +static void pci_remove_removeid_file(struct pci_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_remove_id); +} #else /* !CONFIG_HOTPLUG */ static inline void pci_free_dynids(struct pci_driver *drv) {} static inline int pci_create_newid_file(struct pci_driver *drv) @@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv) return 0; } static inline void pci_remove_newid_file(struct pci_driver *drv) {} +static inline int pci_create_removeid_file(struct pci_driver *drv) +{ + return 0; +} +static inline void pci_remove_removeid_file(struct pci_driver *drv) {} #endif /** @@ -899,13 +964,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, /* register with core */ error = driver_register(&drv->driver); if (error) - return error; + goto out; error = pci_create_newid_file(drv); if (error) - driver_unregister(&drv->driver); + goto out_newid; + error = pci_create_removeid_file(drv); + if (error) + goto out_removeid; +out: return error; + +out_removeid: + pci_remove_newid_file(drv); +out_newid: + driver_unregister(&drv->driver); + goto out; } /** @@ -921,6 +996,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, void pci_unregister_driver(struct pci_driver *drv) { + pci_remove_removeid_file(drv); pci_remove_newid_file(drv); driver_unregister(&drv->driver); pci_free_dynids(drv); @@ -1020,6 +1096,7 @@ struct bus_type pci_bus_type = { .remove = pci_device_remove, .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, + .bus_attrs = pci_bus_attrs, .pm = PCI_PM_OPS_PTR, }; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index dfc4e0ddf241..e9a8706a6401 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -219,6 +219,83 @@ msi_bus_store(struct device *dev, struct device_attribute *attr, return count; } +#ifdef CONFIG_HOTPLUG +static DEFINE_MUTEX(pci_remove_rescan_mutex); +static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf, + size_t count) +{ + unsigned long val; + struct pci_bus *b = NULL; + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + while ((b = pci_find_next_bus(b)) != NULL) + pci_rescan_bus(b); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + +struct bus_attribute pci_bus_attrs[] = { + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store), + __ATTR_NULL +}; + +static ssize_t +dev_rescan_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + struct pci_dev *pdev = to_pci_dev(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + pci_rescan_bus(pdev->bus); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + +static void remove_callback(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + mutex_lock(&pci_remove_rescan_mutex); + pci_remove_bus_device(pdev); + mutex_unlock(&pci_remove_rescan_mutex); +} + +static ssize_t +remove_store(struct device *dev, struct device_attribute *dummy, + const char *buf, size_t count) +{ + int ret = 0; + unsigned long val; + struct pci_dev *pdev = to_pci_dev(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (pci_is_root_bus(pdev->bus)) + return -EBUSY; + + /* An attribute cannot be unregistered by one of its own methods, + * so we have to use this roundabout approach. + */ + if (val) + ret = device_schedule_callback(dev, remove_callback); + if (ret) + count = ret; + return count; +} +#endif + struct device_attribute pci_dev_attrs[] = { __ATTR_RO(resource), __ATTR_RO(vendor), @@ -237,10 +314,25 @@ struct device_attribute pci_dev_attrs[] = { __ATTR(broken_parity_status,(S_IRUGO|S_IWUSR), broken_parity_status_show,broken_parity_status_store), __ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store), +#ifdef CONFIG_HOTPLUG + __ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store), + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store), +#endif __ATTR_NULL, }; static ssize_t +boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%u\n", + !!(pdev->resource[PCI_ROM_RESOURCE].flags & + IORESOURCE_ROM_SHADOW)); +} +struct device_attribute vga_attr = __ATTR_RO(boot_vga); + +static ssize_t pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -493,6 +585,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr, } /** + * pci_adjust_legacy_attr - adjustment of legacy file attributes + * @b: bus to create files under + * @mmap_type: I/O port or memory + * + * Stub implementation. Can be overridden by arch if necessary. + */ +void __weak +pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type) +{ + return; +} + +/** * pci_create_legacy_files - create legacy I/O port and memory files * @b: bus to create files under * @@ -518,6 +623,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_io->read = pci_read_legacy_io; b->legacy_io->write = pci_write_legacy_io; b->legacy_io->mmap = pci_mmap_legacy_io; + pci_adjust_legacy_attr(b, pci_mmap_io); error = device_create_bin_file(&b->dev, b->legacy_io); if (error) goto legacy_io_err; @@ -528,6 +634,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_mem->size = 1024*1024; b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR; b->legacy_mem->mmap = pci_mmap_legacy_mem; + pci_adjust_legacy_attr(b, pci_mmap_mem); error = device_create_bin_file(&b->dev, b->legacy_mem); if (error) goto legacy_mem_err; @@ -719,8 +826,8 @@ static int pci_create_resource_files(struct pci_dev *pdev) return 0; } #else /* !HAVE_PCI_MMAP */ -static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; } -static inline void pci_remove_resource_files(struct pci_dev *dev) { return; } +int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; } +void __weak pci_remove_resource_files(struct pci_dev *dev) { return; } #endif /* HAVE_PCI_MMAP */ /** @@ -884,18 +991,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) pdev->rom_attr = attr; } + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) { + retval = device_create_file(&pdev->dev, &vga_attr); + if (retval) + goto err_rom_file; + } + /* add platform-specific attributes */ retval = pcibios_add_platform_entries(pdev); if (retval) - goto err_rom_file; + goto err_vga_file; /* add sysfs entries for various capabilities */ retval = pci_create_capabilities_sysfs(pdev); if (retval) - goto err_rom_file; + goto err_vga_file; return 0; +err_vga_file: + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + device_remove_file(&pdev->dev, &vga_attr); err_rom_file: if (rom_size) { sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0195066251e5..fe7ac2cea7c9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -20,6 +20,8 @@ #include <linux/pm_wakeup.h> #include <linux/interrupt.h> #include <asm/dma.h> /* isa_dma_bridge_buggy */ +#include <linux/device.h> +#include <asm/setup.h> #include "pci.h" unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; @@ -677,6 +679,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) EXPORT_SYMBOL(pci_choose_state); +#define PCI_EXP_SAVE_REGS 7 + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; @@ -689,7 +693,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); + dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -698,6 +702,9 @@ static int pci_save_pcie_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -718,6 +725,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } @@ -732,7 +742,7 @@ static int pci_save_pcix_state(struct pci_dev *dev) save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); + dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } @@ -805,6 +815,7 @@ pci_restore_state(struct pci_dev *dev) } pci_restore_pcix_state(dev); pci_restore_msi_state(dev); + pci_restore_iov_state(dev); return 0; } @@ -1401,7 +1412,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) { int error; - error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16)); + error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, + PCI_EXP_SAVE_REGS * sizeof(u16)); if (error) dev_err(&dev->dev, "unable to preallocate PCI Express save buffer\n"); @@ -1472,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) if (!pin) return -1; - while (dev->bus->self) { + while (dev->bus->parent) { pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } @@ -1492,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp) { u8 pin = *pinp; - while (dev->bus->self) { + while (dev->bus->parent) { pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } @@ -2016,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe) pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto transaction_done; + msleep(100); pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) { - dev_info(&dev->dev, "Busy after 100ms while trying to reset; " + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto transaction_done; + + dev_info(&dev->dev, "Busy after 100ms while trying to reset; " "sleeping for 1 second\n"); - ssleep(1); - pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) - dev_info(&dev->dev, "Still busy after 1s; " + ssleep(1); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) + dev_info(&dev->dev, "Still busy after 1s; " "proceeding with reset anyway\n"); - } +transaction_done: pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); mdelay(100); @@ -2054,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe) pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (!(status & PCI_AF_STATUS_TP)) + goto transaction_done; + msleep(100); pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) { - dev_info(&dev->dev, "Busy after 100ms while trying to" - " reset; sleeping for 1 second\n"); - ssleep(1); - pci_read_config_byte(dev, - cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) - dev_info(&dev->dev, "Still busy after 1s; " - "proceeding with reset anyway\n"); - } + if (!(status & PCI_AF_STATUS_TP)) + goto transaction_done; + + dev_info(&dev->dev, "Busy after 100ms while trying to" + " reset; sleeping for 1 second\n"); + ssleep(1); + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (status & PCI_AF_STATUS_TP) + dev_info(&dev->dev, "Still busy after 1s; " + "proceeding with reset anyway\n"); + +transaction_done: pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); mdelay(100); @@ -2334,18 +2358,140 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) */ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) { + int reg; + if (resno < PCI_ROM_RESOURCE) { *type = pci_bar_unknown; return PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { *type = pci_bar_mem32; return dev->rom_base_reg; + } else if (resno < PCI_BRIDGE_RESOURCES) { + /* device specific resource */ + reg = pci_iov_resource_bar(dev, resno, type); + if (reg) + return reg; } dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); return 0; } +#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE +static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; +spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED; + +/** + * pci_specified_resource_alignment - get resource alignment specified by user. + * @dev: the PCI device to get + * + * RETURNS: Resource alignment if it is specified. + * Zero if it is not specified. + */ +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) +{ + int seg, bus, slot, func, align_order, count; + resource_size_t align = 0; + char *p; + + spin_lock(&resource_alignment_lock); + p = resource_alignment_param; + while (*p) { + count = 0; + if (sscanf(p, "%d%n", &align_order, &count) == 1 && + p[count] == '@') { + p += count + 1; + } else { + align_order = -1; + } + if (sscanf(p, "%x:%x:%x.%x%n", + &seg, &bus, &slot, &func, &count) != 4) { + seg = 0; + if (sscanf(p, "%x:%x.%x%n", + &bus, &slot, &func, &count) != 3) { + /* Invalid format */ + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", + p); + break; + } + } + p += count; + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + if (align_order == -1) { + align = PAGE_SIZE; + } else { + align = 1 << align_order; + } + /* Found */ + break; + } + if (*p != ';' && *p != ',') { + /* End of param or invalid format */ + break; + } + p++; + } + spin_unlock(&resource_alignment_lock); + return align; +} + +/** + * pci_is_reassigndev - check if specified PCI is target device to reassign + * @dev: the PCI device to check + * + * RETURNS: non-zero for PCI device is a target device to reassign, + * or zero is not. + */ +int pci_is_reassigndev(struct pci_dev *dev) +{ + return (pci_specified_resource_alignment(dev) != 0); +} + +ssize_t pci_set_resource_alignment_param(const char *buf, size_t count) +{ + if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1) + count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1; + spin_lock(&resource_alignment_lock); + strncpy(resource_alignment_param, buf, count); + resource_alignment_param[count] = '\0'; + spin_unlock(&resource_alignment_lock); + return count; +} + +ssize_t pci_get_resource_alignment_param(char *buf, size_t size) +{ + size_t count; + spin_lock(&resource_alignment_lock); + count = snprintf(buf, size, "%s", resource_alignment_param); + spin_unlock(&resource_alignment_lock); + return count; +} + +static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf) +{ + return pci_get_resource_alignment_param(buf, PAGE_SIZE); +} + +static ssize_t pci_resource_alignment_store(struct bus_type *bus, + const char *buf, size_t count) +{ + return pci_set_resource_alignment_param(buf, count); +} + +BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show, + pci_resource_alignment_store); + +static int __init pci_resource_alignment_sysfs_init(void) +{ + return bus_create_file(&pci_bus_type, + &bus_attr_resource_alignment); +} + +late_initcall(pci_resource_alignment_sysfs_init); + static void __devinit pci_no_domains(void) { #ifdef CONFIG_PCI_DOMAINS @@ -2394,6 +2540,9 @@ static int __init pci_setup(char *str) pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { pci_cardbus_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "resource_alignment=", 19)) { + pci_set_resource_alignment_param(str + 19, + strlen(str + 19)); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 149fff65891f..d03f6b99f292 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1,6 +1,8 @@ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H +#include <linux/workqueue.h> + #define PCI_CFG_SPACE_SIZE 256 #define PCI_CFG_SPACE_EXP_SIZE 4096 @@ -135,6 +137,12 @@ extern int pcie_mch_quirk; extern struct device_attribute pci_dev_attrs[]; extern struct device_attribute dev_attr_cpuaffinity; extern struct device_attribute dev_attr_cpulistaffinity; +#ifdef CONFIG_HOTPLUG +extern struct bus_attribute pci_bus_attrs[]; +#else +#define pci_bus_attrs NULL +#endif + /** * pci_match_one_device - Tell if a PCI device structure has a matching @@ -177,6 +185,7 @@ enum pci_bar_type { pci_bar_mem64, /* A 64-bit memory BAR */ }; +extern int pci_setup_device(struct pci_dev *dev); extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); extern int pci_resource_bar(struct pci_dev *dev, int resno, @@ -194,4 +203,60 @@ static inline int pci_ari_enabled(struct pci_bus *bus) return bus->self && bus->self->ari_enabled; } +#ifdef CONFIG_PCI_QUIRKS +extern int pci_is_reassigndev(struct pci_dev *dev); +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); +extern void pci_disable_bridge_window(struct pci_dev *dev); +#endif + +/* Single Root I/O Virtualization */ +struct pci_sriov { + int pos; /* capability position */ + int nres; /* number of resources */ + u32 cap; /* SR-IOV Capabilities */ + u16 ctrl; /* SR-IOV Control */ + u16 total; /* total VFs associated with the PF */ + u16 initial; /* initial VFs associated with the PF */ + u16 nr_virtfn; /* number of VFs available */ + u16 offset; /* first VF Routing ID offset */ + u16 stride; /* following VF stride */ + u32 pgsz; /* page size for BAR alignment */ + u8 link; /* Function Dependency Link */ + struct pci_dev *dev; /* lowest numbered PF */ + struct pci_dev *self; /* this PF */ + struct mutex lock; /* lock for VF bus */ + struct work_struct mtask; /* VF Migration task */ + u8 __iomem *mstate; /* VF Migration State Array */ +}; + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_init(struct pci_dev *dev); +extern void pci_iov_release(struct pci_dev *dev); +extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); +extern void pci_restore_iov_state(struct pci_dev *dev); +extern int pci_iov_bus_range(struct pci_bus *bus); +#else +static inline int pci_iov_init(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline void pci_iov_release(struct pci_dev *dev) + +{ +} +static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + return 0; +} +static inline void pci_restore_iov_state(struct pci_dev *dev) +{ +} +static inline int pci_iov_bus_range(struct pci_bus *bus) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e390707661dd..32ade5af927e 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -38,30 +38,13 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ); +static int __devinit aer_probe (struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); -static int aer_suspend(struct pcie_device *dev, pm_message_t state) -{return 0;} -static int aer_resume(struct pcie_device *dev) {return 0;} static pci_ers_result_t aer_error_detected(struct pci_dev *dev, enum pci_channel_state error); static void aer_error_resume(struct pci_dev *dev); static pci_ers_result_t aer_root_reset(struct pci_dev *dev); -/* - * PCI Express bus's AER Root service driver data structure - */ -static struct pcie_port_service_id aer_id[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_RC_PORT, - .service_type = PCIE_PORT_SERVICE_AER, - }, - { /* end: all zeroes */ } -}; - static struct pci_error_handlers aer_error_handlers = { .error_detected = aer_error_detected, .resume = aer_error_resume, @@ -69,14 +52,12 @@ static struct pci_error_handlers aer_error_handlers = { static struct pcie_port_service_driver aerdriver = { .name = "aer", - .id_table = &aer_id[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, .remove = aer_remove, - .suspend = aer_suspend, - .resume = aer_resume, - .err_handler = &aer_error_handlers, .reset_link = aer_root_reset, @@ -207,8 +188,7 @@ static void aer_remove(struct pcie_device *dev) * * Invoked when PCI Express bus loads AER service driver. **/ -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ) +static int __devinit aer_probe (struct pcie_device *dev) { int status; struct aer_rpc *rpc; diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index ebce26c37049..8edb2f300e8f 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - status = pci_osc_control_set(handle, + status = acpi_pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 382575007382..307452f30035 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data) { struct device_driver *driver; struct pcie_port_service_driver *service_driver; - struct pcie_device *pcie_dev; struct find_aer_service_data *result; result = (struct find_aer_service_data *) data; if (device->bus == &pcie_port_bus_type) { - pcie_dev = to_pcie_device(device); - if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT) + struct pcie_port_data *port_data; + + port_data = pci_get_drvdata(to_pcie_device(device)->port); + if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT) result->is_downstream = 1; driver = device->driver; if (driver) { service_driver = to_service_driver(driver); - if (service_driver->id_table->service_type == - PCIE_PORT_SERVICE_AER) { + if (service_driver->service == PCIE_PORT_SERVICE_AER) { result->aer_driver = service_driver; return 1; } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2529f3f2ea5a..17ad53868f9f 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -25,19 +25,21 @@ #define PCIE_CAPABILITIES_REG 0x2 #define PCIE_SLOT_CAPABILITIES_REG 0x14 #define PCIE_PORT_DEVICE_MAXSERVICES 4 +#define PCIE_PORT_MSI_VECTOR_MASK 0x1f +/* + * According to the PCI Express Base Specification 2.0, the indices of the MSI-X + * table entires used by port services must not exceed 31 + */ +#define PCIE_PORT_MAX_MSIX_ENTRIES 32 #define get_descriptor_id(type, service) (((type - 4) << 4) | service) -struct pcie_port_device_ext { - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ -}; - extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); #ifdef CONFIG_PM -extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state); -extern int pcie_port_device_resume(struct pci_dev *dev); +extern int pcie_port_device_suspend(struct device *dev); +extern int pcie_port_device_resume(struct device *dev); #endif extern void pcie_port_device_remove(struct pci_dev *dev); extern int __must_check pcie_port_bus_register(void); diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index eec89b767f9f..ef3a4eeaebb4 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type); static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) { struct pcie_device *pciedev; + struct pcie_port_data *port_data; struct pcie_port_service_driver *driver; if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type) return 0; - + pciedev = to_pcie_device(dev); driver = to_service_driver(drv); - if ( (driver->id_table->vendor != PCI_ANY_ID && - driver->id_table->vendor != pciedev->id.vendor) || - (driver->id_table->device != PCI_ANY_ID && - driver->id_table->device != pciedev->id.device) || - (driver->id_table->port_type != PCIE_ANY_PORT && - driver->id_table->port_type != pciedev->id.port_type) || - driver->id_table->service_type != pciedev->id.service_type ) + + if (driver->service != pciedev->service) + return 0; + + port_data = pci_get_drvdata(pciedev->port); + + if (driver->port_type != PCIE_ANY_PORT + && driver->port_type != port_data->port_type) return 0; return 1; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 8b3f8c18032f..e39982503863 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -15,10 +15,9 @@ #include <linux/slab.h> #include <linux/pcieport_if.h> +#include "../pci.h" #include "portdrv.h" -extern int pcie_mch_quirk; /* MSI-quirk Indicator */ - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -31,26 +30,150 @@ static void release_pcie_device(struct device *dev) kfree(to_pcie_device(dev)); } -static int is_msi_quirked(struct pci_dev *dev) +/** + * pcie_port_msix_add_entry - add entry to given array of MSI-X entries + * @entries: Array of MSI-X entries + * @new_entry: Index of the entry to add to the array + * @nr_entries: Number of entries aleady in the array + * + * Return value: Position of the added entry in the array + */ +static int pcie_port_msix_add_entry( + struct msix_entry *entries, int new_entry, int nr_entries) { - int port_type, quirk = 0; + int j; + + for (j = 0; j < nr_entries; j++) + if (entries[j].entry == new_entry) + return j; + + entries[j].entry = new_entry; + return j; +} + +/** + * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port + * @dev: PCI Express port to handle + * @vectors: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: 0 on success, error code on failure + */ +static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask) +{ + struct msix_entry *msix_entries; + int idx[PCIE_PORT_DEVICE_MAXSERVICES]; + int nr_entries, status, pos, i, nvec; u16 reg16; + u32 reg32; - pci_read_config_word(dev, - pci_find_capability(dev, PCI_CAP_ID_EXP) + - PCIE_CAPABILITIES_REG, ®16); - port_type = (reg16 >> 4) & PORT_TYPE_MASK; - switch(port_type) { - case PCIE_RC_PORT: - if (pcie_mch_quirk == 1) - quirk = 1; - break; - case PCIE_SW_UPSTREAM_PORT: - case PCIE_SW_DOWNSTREAM_PORT: - default: - break; + nr_entries = pci_msix_table_size(dev); + if (!nr_entries) + return -EINVAL; + if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES) + nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES; + + msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL); + if (!msix_entries) + return -ENOMEM; + + /* + * Allocate as many entries as the port wants, so that we can check + * which of them will be useful. Moreover, if nr_entries is correctly + * equal to the number of entries this port actually uses, we'll happily + * go through without any tricks. + */ + for (i = 0; i < nr_entries; i++) + msix_entries[i].entry = i; + + status = pci_enable_msix(dev, msix_entries, nr_entries); + if (status) + goto Exit; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + idx[i] = -1; + status = -EIO; + nvec = 0; + + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + int entry; + + /* + * The code below follows the PCI Express Base Specification 2.0 + * stating in Section 6.1.6 that "PME and Hot-Plug Event + * interrupts (when both are implemented) always share the same + * MSI or MSI-X vector, as indicated by the Interrupt Message + * Number field in the PCI Express Capabilities register", where + * according to Section 7.8.2 of the specification "For MSI-X, + * the value in this field indicates which MSI-X Table entry is + * used to generate the interrupt message." + */ + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®16); + entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_PME_SHIFT] = i; + idx[PCIE_PORT_SERVICE_HP_SHIFT] = i; + } + + if (mask & PCIE_PORT_SERVICE_AER) { + int entry; + + /* + * The code below follows Section 7.10.10 of the PCI Express + * Base Specification 2.0 stating that bits 31-27 of the Root + * Error Status Register contain a value indicating which of the + * MSI/MSI-X vectors assigned to the port is going to be used + * for AER, where "For MSI-X, the value in this register + * indicates which MSI-X Table entry is used to generate the + * interrupt message." + */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + entry = reg32 >> 27; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_AER_SHIFT] = i; } - return quirk; + + /* + * If nvec is equal to the allocated number of entries, we can just use + * what we have. Otherwise, the port has some extra entries not for the + * services we know and we need to work around that. + */ + if (nvec == nr_entries) { + status = 0; + } else { + /* Drop the temporary MSI-X setup */ + pci_disable_msix(dev); + + /* Now allocate the MSI-X vectors for real */ + status = pci_enable_msix(dev, msix_entries, nvec); + if (status) + goto Exit; + } + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1; + + Exit: + kfree(msix_entries); + return status; + + Error: + pci_disable_msix(dev); + goto Exit; } /** @@ -64,47 +187,32 @@ static int is_msi_quirked(struct pci_dev *dev) */ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { - int i, pos, nvec, status = -EINVAL; - int interrupt_mode = PCIE_PORT_INTx_MODE; + struct pcie_port_data *port_data = pci_get_drvdata(dev); + int irq, interrupt_mode = PCIE_PORT_NO_IRQ; + int i; - /* Set INTx as default */ - for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - nvec++; - vectors[i] = dev->irq; - } - /* Check MSI quirk */ - if (is_msi_quirked(dev)) - return interrupt_mode; - - /* Select MSI-X over MSI if supported */ - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos) { - struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = - {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - status = pci_enable_msix(dev, msix_entries, nvec); - if (!status) { - int j = 0; - - interrupt_mode = PCIE_PORT_MSIX_MODE; - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - vectors[i] = msix_entries[j++].vector; - } - } - } - if (status) { - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (pos) { - status = pci_enable_msi(dev); - if (!status) { - interrupt_mode = PCIE_PORT_MSI_MODE; - for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++) - vectors[i] = dev->irq; - } - } - } + if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) + goto Fallback; + + /* Try to use MSI-X if supported */ + if (!pcie_port_enable_msix(dev, vectors, mask)) + return PCIE_PORT_MSIX_MODE; + + /* We're not going to use MSI-X, so try MSI and fall back to INTx */ + if (!pci_enable_msi(dev)) + interrupt_mode = PCIE_PORT_MSI_MODE; + + Fallback: + if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin) + interrupt_mode = PCIE_PORT_INTx_MODE; + + irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = irq; + + vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1; + return interrupt_mode; } @@ -132,13 +240,11 @@ static int get_port_device_capability(struct pci_dev *dev) pos + PCIE_SLOT_CAPABILITIES_REG, ®32); if (reg32 & SLOT_HP_CAPABLE_MASK) services |= PCIE_PORT_SERVICE_HP; - } - /* PME Capable - root port capability */ - if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT) - services |= PCIE_PORT_SERVICE_PME; - + } + /* AER capable */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) services |= PCIE_PORT_SERVICE_AER; + /* VC support */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC)) services |= PCIE_PORT_SERVICE_VC; @@ -152,20 +258,17 @@ static int get_port_device_capability(struct pci_dev *dev) * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { + struct pcie_port_data *port_data = pci_get_drvdata(parent); struct device *device; + int port_type = port_data->port_type; dev->port = parent; - dev->interrupt_mode = irq_mode; dev->irq = irq; - dev->id.vendor = parent->vendor; - dev->id.device = parent->device; - dev->id.port_type = port_type; - dev->id.service_type = (1 << service_type); + dev->service = service_type; /* Initialize generic device interface */ device = &dev->device; @@ -185,10 +288,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { struct pcie_device *device; @@ -196,7 +298,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, if (!device) return NULL; - pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); + pcie_device_init(parent, device, service_type, irq); return device; } @@ -230,63 +332,90 @@ int pcie_port_device_probe(struct pci_dev *dev) */ int pcie_port_device_register(struct pci_dev *dev) { - struct pcie_port_device_ext *p_ext; - int status, type, capabilities, irq_mode, i; + struct pcie_port_data *port_data; + int status, capabilities, irq_mode, i, nr_serv; int vectors[PCIE_PORT_DEVICE_MAXSERVICES]; u16 reg16; - /* Allocate port device extension */ - if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL))) + port_data = kzalloc(sizeof(*port_data), GFP_KERNEL); + if (!port_data) return -ENOMEM; - - pci_set_drvdata(dev, p_ext); + pci_set_drvdata(dev, port_data); /* Get port type */ pci_read_config_word(dev, pci_find_capability(dev, PCI_CAP_ID_EXP) + PCIE_CAPABILITIES_REG, ®16); - type = (reg16 >> 4) & PORT_TYPE_MASK; + port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK; - /* Now get port services */ capabilities = get_port_device_capability(dev); + /* Root ports are capable of generating PME too */ + if (port_data->port_type == PCIE_RC_PORT) + capabilities |= PCIE_PORT_SERVICE_PME; + irq_mode = assign_interrupt_mode(dev, vectors, capabilities); - p_ext->interrupt_mode = irq_mode; + if (irq_mode == PCIE_PORT_NO_IRQ) { + /* + * Don't use service devices that require interrupts if there is + * no way to generate them. + */ + if (!(capabilities & PCIE_PORT_SERVICE_VC)) { + status = -ENODEV; + goto Error; + } + capabilities = PCIE_PORT_SERVICE_VC; + } + port_data->port_irq_mode = irq_mode; + + status = pci_enable_device(dev); + if (status) + goto Error; + pci_set_master(dev); /* Allocate child services if any */ - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { + for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; + int service = 1 << i; + + if (!(capabilities & service)) + continue; - if (capabilities & (1 << i)) { - child = alloc_pcie_device( - dev, /* parent */ - type, /* port type */ - i, /* service type */ - vectors[i], /* irq */ - irq_mode /* interrupt mode */); - if (child) { - status = device_register(&child->device); - if (status) { - kfree(child); - continue; - } - get_device(&child->device); - } + child = alloc_pcie_device(dev, service, vectors[i]); + if (!child) + continue; + + status = device_register(&child->device); + if (status) { + kfree(child); + continue; } + + get_device(&child->device); + nr_serv++; + } + if (!nr_serv) { + pci_disable_device(dev); + status = -ENODEV; + goto Error; } + return 0; + + Error: + kfree(port_data); + return status; } #ifdef CONFIG_PM static int suspend_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; - pm_message_t state = * (pm_message_t *) data; if ((dev->bus == &pcie_port_bus_type) && (dev->driver)) { service_driver = to_service_driver(dev->driver); if (service_driver->suspend) - service_driver->suspend(to_pcie_device(dev), state); + service_driver->suspend(to_pcie_device(dev)); } return 0; } @@ -294,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data) /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle - * @state: Representation of system power management transition in progress */ -int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state) +int pcie_port_device_suspend(struct device *dev) { - return device_for_each_child(&dev->dev, &state, suspend_iter); + return device_for_each_child(dev, NULL, suspend_iter); } static int resume_iter(struct device *dev, void *data) @@ -318,24 +446,17 @@ static int resume_iter(struct device *dev, void *data) * pcie_port_device_suspend - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct pci_dev *dev) +int pcie_port_device_resume(struct device *dev) { - return device_for_each_child(&dev->dev, NULL, resume_iter); + return device_for_each_child(dev, NULL, resume_iter); } -#endif +#endif /* PM */ static int remove_iter(struct device *dev, void *data) { - struct pcie_port_service_driver *service_driver; - if (dev->bus == &pcie_port_bus_type) { - if (dev->driver) { - service_driver = to_service_driver(dev->driver); - if (service_driver->remove) - service_driver->remove(to_pcie_device(dev)); - } - *(unsigned long*)data = (unsigned long)dev; - return 1; + put_device(dev); + device_unregister(dev); } return 0; } @@ -349,25 +470,21 @@ static int remove_iter(struct device *dev, void *data) */ void pcie_port_device_remove(struct pci_dev *dev) { - struct device *device; - unsigned long device_addr; - int interrupt_mode = PCIE_PORT_INTx_MODE; - int status; + struct pcie_port_data *port_data = pci_get_drvdata(dev); - do { - status = device_for_each_child(&dev->dev, &device_addr, remove_iter); - if (status) { - device = (struct device*)device_addr; - interrupt_mode = (to_pcie_device(device))->interrupt_mode; - put_device(device); - device_unregister(device); - } - } while (status); - /* Switch to INTx by default if MSI enabled */ - if (interrupt_mode == PCIE_PORT_MSIX_MODE) + device_for_each_child(&dev->dev, NULL, remove_iter); + pci_disable_device(dev); + + switch (port_data->port_irq_mode) { + case PCIE_PORT_MSIX_MODE: pci_disable_msix(dev); - else if (interrupt_mode == PCIE_PORT_MSI_MODE) + break; + case PCIE_PORT_MSI_MODE: pci_disable_msi(dev); + break; + } + + kfree(port_data); } /** @@ -392,7 +509,7 @@ static int pcie_port_probe_service(struct device *dev) return -ENODEV; pciedev = to_pcie_device(dev); - status = driver->probe(pciedev, driver->id_table); + status = driver->probe(pciedev); if (!status) { dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", driver->name); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 5ea566e20b37..b924e2463f85 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -32,11 +32,6 @@ MODULE_LICENSE("GPL"); /* global data */ static const char device_name[] = "pcieport-driver"; -static int pcie_portdrv_save_config(struct pci_dev *dev) -{ - return pci_save_state(dev); -} - static int pcie_portdrv_restore_config(struct pci_dev *dev) { int retval; @@ -49,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) } #ifdef CONFIG_PM -static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) -{ - return pcie_port_device_suspend(dev, state); +static struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore = pcie_port_device_resume, +}; -} +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) -static int pcie_portdrv_resume(struct pci_dev *dev) -{ - pci_set_master(dev); - return pcie_port_device_resume(dev); -} -#else -#define pcie_portdrv_suspend NULL -#define pcie_portdrv_resume NULL -#endif +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ /* * pcie_portdrv_probe - Probe PCI-Express port devices @@ -82,20 +77,15 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev, if (status) return status; - if (pci_enable_device(dev) < 0) - return -ENODEV; - - pci_set_master(dev); if (!dev->irq && dev->pin) { dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; " "check vendor BIOS\n", dev->vendor, dev->device); } - if (pcie_port_device_register(dev)) { - pci_disable_device(dev); - return -ENOMEM; - } + status = pcie_port_device_register(dev); + if (status) + return status; - pcie_portdrv_save_config(dev); + pci_save_state(dev); return 0; } @@ -104,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev) { pcie_port_device_remove(dev); pci_disable_device(dev); - kfree(pci_get_drvdata(dev)); } static int error_detected_iter(struct device *device, void *data) @@ -278,10 +267,9 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .suspend = pcie_portdrv_suspend, - .resume = pcie_portdrv_resume, - .err_handler = &pcie_portdrv_err_handler, + + .driver.pm = PCIE_PORTDRV_PM_OPS, }; static int __init pcie_portdrv_init(void) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 55ec44a27e89..e2f3dd098cfa 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) struct resource *res; int i; - if (!dev) /* It's a host bus, nothing to read */ + if (!child->parent) /* It's a host bus, nothing to read */ return; if (dev->transparent) { @@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, /* * If we already got to this bus through a different bridge, - * ignore it. This can happen with the i450NX chipset. + * don't re-add it. This can happen with the i450NX chipset. + * + * However, we continue to descend down the hierarchy and + * scan remaining child buses. */ - if (pci_find_bus(pci_domain_nr(bus), busnr)) { - dev_info(&dev->dev, "bus %04x:%02x already known\n", - pci_domain_nr(bus), busnr); - goto out; + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (!child) { + child = pci_add_new_bus(bus, dev, busnr); + if (!child) + goto out; + child->primary = buses & 0xFF; + child->subordinate = (buses >> 16) & 0xFF; + child->bridge_ctl = bctl; } - child = pci_add_new_bus(bus, dev, busnr); - if (!child) - goto out; - child->primary = buses & 0xFF; - child->subordinate = (buses >> 16) & 0xFF; - child->bridge_ctl = bctl; - cmax = pci_scan_child_bus(child); if (cmax > max) max = cmax; @@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev) dev->irq = irq; } +static void set_pcie_port_type(struct pci_dev *pdev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!pos) + return; + pdev->is_pcie = 1; + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; +} + #define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED) /** @@ -683,12 +696,33 @@ static void pci_read_irq(struct pci_dev *dev) * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses,IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. - * Returns 0 on success and -1 if unknown type of device (not normal, bridge - * or CardBus). + * Returns 0 on success and negative if unknown type of device (not normal, + * bridge or CardBus). */ -static int pci_setup_device(struct pci_dev * dev) +int pci_setup_device(struct pci_dev *dev) { u32 class; + u8 hdr_type; + struct pci_slot *slot; + + if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) + return -EIO; + + dev->sysdata = dev->bus->sysdata; + dev->dev.parent = dev->bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->hdr_type = hdr_type & 0x7f; + dev->multifunction = !!(hdr_type & 0x80); + dev->error_state = pci_channel_io_normal; + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; + + /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) + set this higher, assuming the system even supports it. */ + dev->dma_mask = 0xffffffff; dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), dev->bus->number, PCI_SLOT(dev->devfn), @@ -703,12 +737,14 @@ static int pci_setup_device(struct pci_dev * dev) dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n", dev->vendor, dev->device, class, dev->hdr_type); + /* need to have dev->class ready */ + dev->cfg_size = pci_cfg_space_size(dev); + /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); - class = dev->class >> 8; switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ @@ -770,7 +806,7 @@ static int pci_setup_device(struct pci_dev * dev) default: /* unknown header */ dev_err(&dev->dev, "unknown header type %02x, " "ignoring device\n", dev->hdr_type); - return -1; + return -EIO; bad: dev_err(&dev->dev, "ignoring class %02x (doesn't match header " @@ -785,6 +821,7 @@ static int pci_setup_device(struct pci_dev * dev) static void pci_release_capabilities(struct pci_dev *dev) { pci_vpd_release(dev); + pci_iov_release(dev); } /** @@ -803,19 +840,6 @@ static void pci_release_dev(struct device *dev) kfree(pci_dev); } -static void set_pcie_port_type(struct pci_dev *pdev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (!pos) - return; - pdev->is_pcie = 1; - pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); - pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; -} - /** * pci_cfg_space_size - get the configuration space size of the PCI device. * @dev: PCI device @@ -847,6 +871,11 @@ int pci_cfg_space_size(struct pci_dev *dev) { int pos; u32 status; + u16 class; + + class = dev->class >> 8; + if (class == PCI_CLASS_BRIDGE_HOST) + return pci_cfg_space_size_ext(dev); pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { @@ -891,9 +920,7 @@ EXPORT_SYMBOL(alloc_pci_dev); static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; - struct pci_slot *slot; u32 l; - u8 hdr_type; int delay = 1; if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l)) @@ -920,34 +947,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) } } - if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type)) - return NULL; - dev = alloc_pci_dev(); if (!dev) return NULL; dev->bus = bus; - dev->sysdata = bus->sysdata; - dev->dev.parent = bus->bridge; - dev->dev.bus = &pci_bus_type; dev->devfn = devfn; - dev->hdr_type = hdr_type & 0x7f; - dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; - dev->cfg_size = pci_cfg_space_size(dev); - dev->error_state = pci_channel_io_normal; - set_pcie_port_type(dev); - - list_for_each_entry(slot, &bus->slots, list) - if (PCI_SLOT(devfn) == slot->number) - dev->slot = slot; - /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) - set this higher, assuming the system even supports it. */ - dev->dma_mask = 0xffffffff; - if (pci_setup_device(dev) < 0) { + if (pci_setup_device(dev)) { kfree(dev); return NULL; } @@ -972,6 +981,9 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Alternative Routing-ID Forwarding */ pci_enable_ari(dev); + + /* Single Root I/O Virtualization */ + pci_iov_init(dev); } void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) @@ -1006,6 +1018,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; + dev = pci_get_slot(bus, devfn); + if (dev) { + pci_dev_put(dev); + return dev; + } + dev = pci_scan_device(bus, devfn); if (!dev) return NULL; @@ -1024,35 +1042,27 @@ EXPORT_SYMBOL(pci_scan_single_device); * Scan a PCI slot on the specified PCI bus for devices, adding * discovered devices to the @bus->devices list. New devices * will not have is_added set. + * + * Returns the number of new devices found. */ int pci_scan_slot(struct pci_bus *bus, int devfn) { - int func, nr = 0; - int scan_all_fns; - - scan_all_fns = pcibios_scan_all_fns(bus, devfn); - - for (func = 0; func < 8; func++, devfn++) { - struct pci_dev *dev; - - dev = pci_scan_single_device(bus, devfn); - if (dev) { - nr++; + int fn, nr = 0; + struct pci_dev *dev; - /* - * If this is a single function device, - * don't scan past the first function. - */ - if (!dev->multifunction) { - if (func > 0) { - dev->multifunction = 1; - } else { - break; - } + dev = pci_scan_single_device(bus, devfn); + if (dev && !dev->is_added) /* new device? */ + nr++; + + if ((dev && dev->multifunction) || + (!dev && pcibios_scan_all_fns(bus, devfn))) { + for (fn = 1; fn < 8; fn++) { + dev = pci_scan_single_device(bus, devfn + fn); + if (dev) { + if (!dev->is_added) + nr++; + dev->multifunction = 1; } - } else { - if (func == 0 && !scan_all_fns) - break; } } @@ -1074,12 +1084,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) for (devfn = 0; devfn < 0x100; devfn += 8) pci_scan_slot(bus, devfn); + /* Reserve buses for SR-IOV capability. */ + max += pci_iov_bus_range(bus); + /* * After performing arch-dependent fixup of the bus, look behind * all PCI-to-PCI bridges on this bus. */ - pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - pcibios_fixup_bus(bus); + if (!bus->is_added) { + pr_debug("PCI: Fixups for bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + pcibios_fixup_bus(bus); + if (pci_is_root_bus(bus)) + bus->is_added = 1; + } + for (pass=0; pass < 2; pass++) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || @@ -1114,7 +1133,7 @@ struct pci_bus * pci_create_bus(struct device *parent, if (!b) return NULL; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev){ kfree(b); return NULL; @@ -1133,7 +1152,6 @@ struct pci_bus * pci_create_bus(struct device *parent, list_add_tail(&b->node, &pci_root_buses); up_write(&pci_bus_sem); - memset(dev, 0, sizeof(*dev)); dev->parent = parent; dev->release = pci_release_bus_bridge_dev; dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus); @@ -1193,6 +1211,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, EXPORT_SYMBOL(pci_scan_bus_parented); #ifdef CONFIG_HOTPLUG +/** + * pci_rescan_bus - scan a PCI bus for devices. + * @bus: PCI bus to scan + * + * Scan a PCI bus and child buses for new devices, adds them, + * and enables them. + * + * Returns the max number of subordinate bus discovered. + */ +unsigned int __devinit pci_rescan_bus(struct pci_bus *bus) +{ + unsigned int max; + struct pci_dev *dev; + + max = pci_scan_child_bus(bus); + + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (dev->subordinate) + pci_bus_size_bridges(dev->subordinate); + up_read(&pci_bus_sem); + + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); + pci_bus_add_devices(bus); + + return max; +} +EXPORT_SYMBOL_GPL(pci_rescan_bus); + EXPORT_SYMBOL(pci_add_new_bus); EXPORT_SYMBOL(pci_scan_slot); EXPORT_SYMBOL(pci_scan_bridge); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 92b9efe9bcaf..9b2f0d96900d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -24,6 +24,7 @@ #include <linux/kallsyms.h> #include <linux/dmi.h> #include <linux/pci-aspm.h> +#include <linux/ioport.h> #include "pci.h" int isa_dma_bridge_buggy; @@ -34,6 +35,65 @@ int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); #ifdef CONFIG_PCI_QUIRKS +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'pci=resource_alignment='. + * It also rounds up size to specified alignment. + * Later on, the kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_resource_alignment(struct pci_dev *dev) +{ + int i; + struct resource *r; + resource_size_t align, size; + + if (!pci_is_reassigndev(dev)) + return; + + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + dev_warn(&dev->dev, + "Can't reassign resources to host bridge.\n"); + return; + } + + dev_info(&dev->dev, "Disabling device and release resources.\n"); + pci_disable_device(dev); + + align = pci_specified_resource_alignment(dev); + for (i=0; i < PCI_BRIDGE_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + size = resource_size(r); + if (size < align) { + size = align; + dev_info(&dev->dev, + "Rounding up size of resource #%d to %#llx.\n", + i, (unsigned long long)size); + } + r->end = size - 1; + r->start = 0; + } + /* Need to disable bridge's resource window, + * to enable the kernel to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + r->end = resource_size(r) - 1; + r->start = 0; + } + pci_disable_bridge_window(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment); + /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow * PCI scanning code to "skip" this now blacklisted device. @@ -1126,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) * its on-board VGA controller */ asus_hides_smbus = 1; } - else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG) + else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2) switch(dev->subsystem_device) { case 0x00b8: /* Compaq Evo D510 CMT */ case 0x00b9: /* Compaq Evo D510 SFF */ + /* Motherboard doesn't have Host bridge + * subvendor/subdevice IDs and on-board VGA + * controller is disabled if an AGP card is + * inserted, therefore checking USB UHCI + * Controller #1 */ asus_hides_smbus = 1; } else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC) @@ -1154,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_2, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) @@ -1664,9 +1729,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev) * of parallel ports and <S> is the number of serial ports. */ switch (dev->device) { + case PCI_DEVICE_ID_NETMOS_9835: + /* Well, this rule doesn't hold for the following 9835 device */ + if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM && + dev->subsystem_device == 0x0299) + return; case PCI_DEVICE_ID_NETMOS_9735: case PCI_DEVICE_ID_NETMOS_9745: - case PCI_DEVICE_ID_NETMOS_9835: case PCI_DEVICE_ID_NETMOS_9845: case PCI_DEVICE_ID_NETMOS_9855: if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL && @@ -2078,6 +2147,92 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15, nvenet_msi_disable); +static int __devinit ht_check_msi_mapping(struct pci_dev *dev) +{ + int pos, ttl = 48; + int found = 0; + + /* check if there is HT MSI cap or enabled on this device */ + pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING); + while (pos && ttl--) { + u8 flags; + + if (found < 1) + found = 1; + if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, + &flags) == 0) { + if (flags & HT_MSI_FLAGS_ENABLE) { + if (found < 2) { + found = 2; + break; + } + } + } + pos = pci_find_next_ht_capability(dev, pos, + HT_CAPTYPE_MSI_MAPPING); + } + + return found; +} + +static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge) +{ + struct pci_dev *dev; + int pos; + int i, dev_no; + int found = 0; + + dev_no = host_bridge->devfn >> 3; + for (i = dev_no + 1; i < 0x20; i++) { + dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0)); + if (!dev) + continue; + + /* found next host bridge ?*/ + pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE); + if (pos != 0) { + pci_dev_put(dev); + break; + } + + if (ht_check_msi_mapping(dev)) { + found = 1; + pci_dev_put(dev); + break; + } + pci_dev_put(dev); + } + + return found; +} + +#define PCI_HT_CAP_SLAVE_CTRL0 4 /* link control */ +#define PCI_HT_CAP_SLAVE_CTRL1 8 /* link control to */ + +static int __devinit is_end_of_ht_chain(struct pci_dev *dev) +{ + int pos, ctrl_off; + int end = 0; + u16 flags, ctrl; + + pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE); + + if (!pos) + goto out; + + pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags); + + ctrl_off = ((flags >> 10) & 1) ? + PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1; + pci_read_config_word(dev, pos + ctrl_off, &ctrl); + + if (ctrl & (1 << 6)) + end = 1; + +out: + return end; +} + static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) { struct pci_dev *host_bridge; @@ -2102,6 +2257,11 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) if (!found) return; + /* don't enable end_device/host_bridge with leaf directly here */ + if (host_bridge == dev && is_end_of_ht_chain(host_bridge) && + host_bridge_with_leaf(host_bridge)) + goto out; + /* root did that ! */ if (msi_ht_cap_enabled(host_bridge)) goto out; @@ -2132,44 +2292,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev) } } -static int __devinit ht_check_msi_mapping(struct pci_dev *dev) -{ - int pos, ttl = 48; - int found = 0; - - /* check if there is HT MSI cap or enabled on this device */ - pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING); - while (pos && ttl--) { - u8 flags; - - if (found < 1) - found = 1; - if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, - &flags) == 0) { - if (flags & HT_MSI_FLAGS_ENABLE) { - if (found < 2) { - found = 2; - break; - } - } - } - pos = pci_find_next_ht_capability(dev, pos, - HT_CAPTYPE_MSI_MAPPING); - } - - return found; -} - -static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) +static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all) { struct pci_dev *host_bridge; int pos; int found; - /* Enabling HT MSI mapping on this device breaks MCP51 */ - if (dev->device == 0x270) - return; - /* check if there is HT MSI cap or enabled on this device */ found = ht_check_msi_mapping(dev); @@ -2193,7 +2321,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) /* Host bridge is to HT */ if (found == 1) { /* it is not enabled, try to enable it */ - nv_ht_enable_msi_mapping(dev); + if (all) + ht_enable_msi_mapping(dev); + else + nv_ht_enable_msi_mapping(dev); } return; } @@ -2205,8 +2336,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) /* Host bridge is not to HT, disable HT MSI mapping on this device */ ht_disable_msi_mapping(dev); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk); + +static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev) +{ + return __nv_msi_ht_cap_quirk(dev, 1); +} + +static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) +{ + return __nv_msi_ht_cap_quirk(dev, 0); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev) { diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 042e08924421..86503c14ce7e 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus) down_write(&pci_bus_sem); list_del(&pci_bus->node); up_write(&pci_bus_sem); + if (!pci_bus->is_added) + return; + pci_remove_legacy_files(pci_bus); device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity); device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity); @@ -92,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus); */ void pci_remove_bus_device(struct pci_dev *dev) { + pci_stop_bus_device(dev); if (dev->subordinate) { struct pci_bus *b = dev->subordinate; diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 5af8bd538149..710d4ea69568 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev) if (pdev->is_pcie) return NULL; while (1) { - if (!pdev->bus->self) + if (!pdev->bus->parent) break; pdev = pdev->bus->self; /* a p2p bridge */ diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 704608945780..334285a8e237 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -27,7 +27,7 @@ #include <linux/slab.h> -static void pbus_assign_resources_sorted(struct pci_bus *bus) +static void pbus_assign_resources_sorted(const struct pci_bus *bus) { struct pci_dev *dev; struct resource *res; @@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_bus_region region; u32 l, bu, lu, io_upper16; + if (!pci_is_root_bus(bus) && bus->is_added) + return; + dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", pci_domain_nr(bus), bus->number); @@ -495,7 +498,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_size_bridges); -void __ref pci_bus_assign_resources(struct pci_bus *bus) +void __ref pci_bus_assign_resources(const struct pci_bus *bus) { struct pci_bus *b; struct pci_dev *dev; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 32e8d88a4619..3039fcb86afc 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +#ifdef CONFIG_PCI_QUIRKS +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disabling bridge window.\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} +#endif /* CONFIG_PCI_QUIRKS */ + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 5a8ccb4f604d..21189447e545 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -1,8 +1,8 @@ /* * drivers/pci/slot.c * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx> - * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P. - * Alex Chiang <achiang@hp.com> + * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P. + * Alex Chiang <achiang@hp.com> */ #include <linux/kobject.h> @@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj) struct pci_dev *dev; struct pci_slot *slot = to_pci_slot(kobj); - pr_debug("%s: releasing pci_slot on %x:%d\n", __func__, - slot->bus->number, slot->number); + dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n", + slot->number, pci_slot_name(slot)); list_for_each_entry(dev, &slot->bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->number) @@ -248,9 +248,8 @@ placeholder: if (PCI_SLOT(dev->devfn) == slot_nr) dev->slot = slot; - /* Don't care if debug printk has a -1 for slot_nr */ - pr_debug("%s: created pci_slot on %04x:%02x:%02x\n", - __func__, pci_domain_nr(parent), parent->number, slot_nr); + dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n", + slot_nr, pci_slot_name(slot)); out: kfree(slot_name); @@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot); */ void pci_destroy_slot(struct pci_slot *slot) { - pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__, - atomic_read(&slot->kobj.kref.refcount) - 1, - pci_domain_nr(slot->bus), slot->bus->number, slot->number); + dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n", + slot->number, atomic_read(&slot->kobj.kref.refcount) - 1); down_write(&pci_bus_sem); kobject_put(&slot->kobj); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 78199151c00b..d047f846c3ed 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -257,6 +257,40 @@ void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ + +#define OSC_QUERY_TYPE 0 +#define OSC_SUPPORT_TYPE 1 +#define OSC_CONTROL_TYPE 2 +#define OSC_SUPPORT_MASKS 0x1f + +/* _OSC DW0 Definition */ +#define OSC_QUERY_ENABLE 1 +#define OSC_REQUEST_ERROR 2 +#define OSC_INVALID_UUID_ERROR 4 +#define OSC_INVALID_REVISION_ERROR 8 +#define OSC_CAPABILITIES_MASK_ERROR 16 + +/* _OSC DW1 Definition (OS Support Fields) */ +#define OSC_EXT_PCI_CONFIG_SUPPORT 1 +#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 +#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 +#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 +#define OSC_MSI_SUPPORT 16 + +/* _OSC DW1 Definition (OS Control Fields) */ +#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 +#define OSC_SHPC_NATIVE_HP_CONTROL 2 +#define OSC_PCI_EXPRESS_PME_CONTROL 4 +#define OSC_PCI_EXPRESS_AER_CONTROL 8 +#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 + +#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ + OSC_SHPC_NATIVE_HP_CONTROL | \ + OSC_PCI_EXPRESS_PME_CONTROL | \ + OSC_PCI_EXPRESS_AER_CONTROL | \ + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) + +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/msi.h b/include/linux/msi.h index d2b8a1e8ca11..6991ab5b24d1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -20,20 +20,23 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); struct msi_desc { struct { - __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ + __u8 is_msix : 1; + __u8 multiple: 3; /* log2 number of messages */ __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 masked : 1; __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ - __u32 maskbits_mask; /* mask bits mask */ __u16 entry_nr; /* specific enabled entry */ unsigned default_irq; /* default pre-assigned irq */ - }msi_attrib; + } msi_attrib; + u32 masked; /* mask bits */ unsigned int irq; struct list_head list; - void __iomem *mask_base; + union { + void __iomem *mask_base; + u8 mask_pos; + }; struct pci_dev *dev; /* Last set MSI message */ diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 042c166f65d5..092e82e0048c 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -10,72 +10,25 @@ #include <linux/acpi.h> -#define OSC_QUERY_TYPE 0 -#define OSC_SUPPORT_TYPE 1 -#define OSC_CONTROL_TYPE 2 -#define OSC_SUPPORT_MASKS 0x1f - -/* - * _OSC DW0 Definition - */ -#define OSC_QUERY_ENABLE 1 -#define OSC_REQUEST_ERROR 2 -#define OSC_INVALID_UUID_ERROR 4 -#define OSC_INVALID_REVISION_ERROR 8 -#define OSC_CAPABILITIES_MASK_ERROR 16 - -/* - * _OSC DW1 Definition (OS Support Fields) - */ -#define OSC_EXT_PCI_CONFIG_SUPPORT 1 -#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 -#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 -#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 -#define OSC_MSI_SUPPORT 16 - -/* - * _OSC DW1 Definition (OS Control Fields) - */ -#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 -#define OSC_SHPC_NATIVE_HP_CONTROL 2 -#define OSC_PCI_EXPRESS_PME_CONTROL 4 -#define OSC_PCI_EXPRESS_AER_CONTROL 8 -#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 - -#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ - OSC_SHPC_NATIVE_HP_CONTROL | \ - OSC_PCI_EXPRESS_PME_CONTROL | \ - OSC_PCI_EXPRESS_AER_CONTROL | \ - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - #ifdef CONFIG_ACPI -extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { - /* Find root host bridge */ - while (pdev->bus->self) - pdev = pdev->bus->self; - - return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), - pdev->bus->number); + struct pci_bus *pbus = pdev->bus; + /* Find a PCI root bus */ + while (pbus->parent) + pbus = pbus->parent; + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) { - int seg = pci_domain_nr(pbus), busnr = pbus->number; - struct pci_dev *bridge = pbus->self; - if (bridge) - return DEVICE_ACPI_HANDLE(&(bridge->dev)); - return acpi_get_pci_rootbridge_handle(seg, busnr); + if (pbus->parent) + return DEVICE_ACPI_HANDLE(&(pbus->self->dev)); + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } #else -#if !defined(AE_ERROR) -typedef u32 acpi_status; -#define AE_ERROR (acpi_status) (0x0001) -#endif -static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index df3644132617..a7fe4bbd7ff1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -52,6 +52,7 @@ #include <asm/atomic.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/irqreturn.h> /* Include the ID list */ #include <linux/pci_ids.h> @@ -93,6 +94,12 @@ enum { /* #6: expansion ROM resource */ PCI_ROM_RESOURCE, + /* device specific resources */ +#ifdef CONFIG_PCI_IOV + PCI_IOV_RESOURCES, + PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, +#endif + /* resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4 @@ -180,6 +187,7 @@ struct pci_cap_saved_state { struct pcie_link_state; struct pci_vpd; +struct pci_sriov; /* * The pci_dev structure is used to describe PCI devices. @@ -257,6 +265,8 @@ struct pci_dev { unsigned int is_managed:1; unsigned int is_pcie:1; unsigned int state_saved:1; + unsigned int is_physfn:1; + unsigned int is_virtfn:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -270,6 +280,12 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; +#ifdef CONFIG_PCI_IOV + union { + struct pci_sriov *sriov; /* SR-IOV capability related */ + struct pci_dev *physfn; /* the PF this VF is associated with */ + }; +#endif }; extern struct pci_dev *alloc_pci_dev(void); @@ -341,6 +357,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +/* + * Returns true if the pci bus is root (behind host-pci bridge), + * false otherwise + */ +static inline bool pci_is_root_bus(struct pci_bus *pbus) +{ + return !(pbus->parent); +} + #ifdef CONFIG_PCI_MSI static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { @@ -528,7 +553,7 @@ void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -void pci_bus_add_devices(struct pci_bus *bus); +void pci_bus_add_devices(const struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, @@ -702,6 +727,9 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +#ifdef CONFIG_HOTPLUG +unsigned int pci_rescan_bus(struct pci_bus *bus); +#endif /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); @@ -709,7 +737,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ -void pci_bus_assign_resources(struct pci_bus *bus); +void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); void pci_assign_unassigned_resources(void); @@ -790,7 +818,7 @@ struct msix_entry { #ifndef CONFIG_PCI_MSI -static inline int pci_enable_msi(struct pci_dev *dev) +static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { return -1; } @@ -800,6 +828,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev) static inline void pci_disable_msi(struct pci_dev *dev) { } +static inline int pci_msix_table_size(struct pci_dev *dev) +{ + return 0; +} static inline int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) { @@ -821,9 +853,10 @@ static inline int pci_msi_enabled(void) return 0; } #else -extern int pci_enable_msi(struct pci_dev *dev); +extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); +extern int pci_msix_table_size(struct pci_dev *dev); extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); extern void pci_msix_shutdown(struct pci_dev *dev); @@ -842,6 +875,8 @@ static inline int pcie_aspm_enabled(void) extern int pcie_aspm_enabled(void); #endif +#define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1) + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); @@ -1195,5 +1230,23 @@ int pci_ext_cfg_avail(struct pci_dev *dev); void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); +#ifdef CONFIG_PCI_IOV +extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); +extern void pci_disable_sriov(struct pci_dev *dev); +extern irqreturn_t pci_sriov_migration(struct pci_dev *dev); +#else +static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + return -ENODEV; +} +static inline void pci_disable_sriov(struct pci_dev *dev) +{ +} +static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + return IRQ_NONE; +} +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e5816dd33371..cb14fd260837 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2396,6 +2396,7 @@ #define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c #define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0 #define PCI_DEVICE_ID_INTEL_82801DB_1 0x24c1 +#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2 #define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3 #define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5 #define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6 diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 027815b4635e..e4d08c1b2e0b 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -235,7 +235,7 @@ #define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ @@ -375,6 +375,7 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ @@ -487,6 +488,8 @@ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) @@ -498,6 +501,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -615,4 +619,35 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 6cd91e3f9820..b4c79545330b 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -16,29 +16,30 @@ #define PCIE_ANY_PORT 7 /* Service Type */ -#define PCIE_PORT_SERVICE_PME 1 /* Power Management Event */ -#define PCIE_PORT_SERVICE_AER 2 /* Advanced Error Reporting */ -#define PCIE_PORT_SERVICE_HP 4 /* Native Hotplug */ -#define PCIE_PORT_SERVICE_VC 8 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ +#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) +#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ +#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) +#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ +#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) +#define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) /* Root/Upstream/Downstream Port's Interrupt Mode */ +#define PCIE_PORT_NO_IRQ (-1) #define PCIE_PORT_INTx_MODE 0 #define PCIE_PORT_MSI_MODE 1 #define PCIE_PORT_MSIX_MODE 2 -struct pcie_port_service_id { - __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ - __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ - __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ - __u32 port_type, service_type; /* Port Entity */ - kernel_ulong_t driver_data; +struct pcie_port_data { + int port_type; /* Type of the port */ + int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ }; struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ - struct pcie_port_service_id id; /* Service ID */ - struct pci_dev *port; /* Root/Upstream/Downstream Port */ + struct pci_dev *port; /* Root/Upstream/Downstream Port */ + u32 service; /* Port service this device represents */ void *priv_data; /* Service Private Data */ struct device device; /* Generic Device Interface */ }; @@ -56,10 +57,9 @@ static inline void* get_service_data(struct pcie_device *dev) struct pcie_port_service_driver { const char *name; - int (*probe) (struct pcie_device *dev, - const struct pcie_port_service_id *id); + int (*probe) (struct pcie_device *dev); void (*remove) (struct pcie_device *dev); - int (*suspend) (struct pcie_device *dev, pm_message_t state); + int (*suspend) (struct pcie_device *dev); int (*resume) (struct pcie_device *dev); /* Service Error Recovery Handler */ @@ -68,7 +68,9 @@ struct pcie_port_service_driver { /* Link Reset Capability - AER service driver specific */ pci_ers_result_t (*reset_link) (struct pci_dev *dev); - const struct pcie_port_service_id *id_table; + int port_type; /* Type of the port this driver can handle */ + u32 service; /* Port service this device represents */ + struct device_driver driver; }; #define to_service_driver(d) \ |